Initial loadjdk7-b24

author: duke <none@none> 2007-12-01 00:00:00 +0000
committer: duke <none@none> 2007-12-01 00:00:00 +0000
commit: fa6b5a8027b86d2f8a200e72b4ef6a0d3f9189d3 (patch)
tree: 8376f6e5c41e70162b5867d9e1fea3f17f540473 /src/share/vm/opto
103 files changed, 91467 insertions, 0 deletions
diff --git a/src/share/vm/opto/addnode.cpp b/src/share/vm/opto/addnode.cpp
new file mode 100644
index 000000000..42a17c997
--- /dev/null
+++ b/src/share/vm/opto/addnode.cpp
@@ -0,0 +1,871 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+#include "incls/_precompiled.incl"
+#include "incls/_addnode.cpp.incl"
+
+#define MAXFLOAT        ((float)3.40282346638528860e+38)
+
+// Classic Add functionality.  This covers all the usual 'add' behaviors for
+// an algebraic ring.  Add-integer, add-float, add-double, and binary-or are
+// all inherited from this class.  The various identity values are supplied
+// by virtual functions.
+
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+// Hash function over AddNodes.  Needs to be commutative; i.e., I swap
+// (commute) inputs to AddNodes willy-nilly so the hash function must return
+// the same value in the presence of edge swapping.
+uint AddNode::hash() const {
+  return (uintptr_t)in(1) + (uintptr_t)in(2) + Opcode();
+}
+
+//------------------------------Identity---------------------------------------
+// If either input is a constant 0, return the other input.
+Node *AddNode::Identity( PhaseTransform *phase ) {
+  const Type *zero = add_id();  // The additive identity
+  if( phase->type( in(1) )->higher_equal( zero ) ) return in(2);
+  if( phase->type( in(2) )->higher_equal( zero ) ) return in(1);
+  return this;
+}
+
+//------------------------------commute----------------------------------------
+// Commute operands to move loads and constants to the right.
+static bool commute( Node *add, int con_left, int con_right ) {
+  Node *in1 = add->in(1);
+  Node *in2 = add->in(2);
+
+  // Convert "1+x" into "x+1".
+  // Right is a constant; leave it
+  if( con_right ) return false;
+  // Left is a constant; move it right.
+  if( con_left ) {
+    add->swap_edges(1, 2);
+    return true;
+  }
+
+  // Convert "Load+x" into "x+Load".
+  // Now check for loads
+  if( in2->is_Load() ) return false;
+  // Left is a Load and Right is not; move it right.
+  if( in1->is_Load() ) {
+    add->swap_edges(1, 2);
+    return true;
+  }
+
+  PhiNode *phi;
+  // Check for tight loop increments: Loop-phi of Add of loop-phi
+  if( in1->is_Phi() && (phi = in1->as_Phi()) && !phi->is_copy() && phi->region()->is_Loop() && phi->in(2)==add)
+    return false;
+  if( in2->is_Phi() && (phi = in2->as_Phi()) && !phi->is_copy() && phi->region()->is_Loop() && phi->in(2)==add){
+    add->swap_edges(1, 2);
+    return true;
+  }
+
+  // Otherwise, sort inputs (commutativity) to help value numbering.
+  if( in1->_idx > in2->_idx ) {
+    add->swap_edges(1, 2);
+    return true;
+  }
+  return false;
+}
+
+//------------------------------Idealize---------------------------------------
+// If we get here, we assume we are associative!
+Node *AddNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  int con_left  = t1->singleton();
+  int con_right = t2->singleton();
+
+  // Check for commutative operation desired
+  if( commute(this,con_left,con_right) ) return this;
+
+  AddNode *progress = NULL;             // Progress flag
+
+  // Convert "(x+1)+2" into "x+(1+2)".  If the right input is a
+  // constant, and the left input is an add of a constant, flatten the
+  // expression tree.
+  Node *add1 = in(1);
+  Node *add2 = in(2);
+  int add1_op = add1->Opcode();
+  int this_op = Opcode();
+  if( con_right && t2 != Type::TOP && // Right input is a constant?
+      add1_op == this_op ) { // Left input is an Add?
+
+    // Type of left _in right input
+    const Type *t12 = phase->type( add1->in(2) );
+    if( t12->singleton() && t12 != Type::TOP ) { // Left input is an add of a constant?
+      // Check for rare case of closed data cycle which can happen inside
+      // unreachable loops. In these cases the computation is undefined.
+#ifdef ASSERT
+      Node *add11    = add1->in(1);
+      int   add11_op = add11->Opcode();
+      if( (add1 == add1->in(1))
+         || (add11_op == this_op && add11->in(1) == add1) ) {
+        assert(false, "dead loop in AddNode::Ideal");
+      }
+#endif
+      // The Add of the flattened expression
+      Node *x1 = add1->in(1);
+      Node *x2 = phase->makecon( add1->as_Add()->add_ring( t2, t12 ));
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      if( igvn ) {
+        set_req_X(2,x2,igvn);
+        set_req_X(1,x1,igvn);
+      } else {
+        set_req(2,x2);
+        set_req(1,x1);
+      }
+      progress = this;            // Made progress
+      add1 = in(1);
+      add1_op = add1->Opcode();
+    }
+  }
+
+  // Convert "(x+1)+y" into "(x+y)+1".  Push constants down the expression tree.
+  if( add1_op == this_op && !con_right ) {
+    Node *a12 = add1->in(2);
+    const Type *t12 = phase->type( a12 );
+    if( t12->singleton() && t12 != Type::TOP && (add1 != add1->in(1)) ) {
+      add2 = add1->clone();
+      add2->set_req(2, in(2));
+      add2 = phase->transform(add2);
+      set_req(1, add2);
+      set_req(2, a12);
+      progress = this;
+      add2 = a12;
+    }
+  }
+
+  // Convert "x+(y+1)" into "(x+y)+1".  Push constants down the expression tree.
+  int add2_op = add2->Opcode();
+  if( add2_op == this_op && !con_left ) {
+    Node *a22 = add2->in(2);
+    const Type *t22 = phase->type( a22 );
+    if( t22->singleton() && t22 != Type::TOP && (add2 != add2->in(1)) ) {
+      Node *addx = add2->clone();
+      addx->set_req(1, in(1));
+      addx->set_req(2, add2->in(1));
+      addx = phase->transform(addx);
+      set_req(1, addx);
+      set_req(2, a22);
+      progress = this;
+    }
+  }
+
+  return progress;
+}
+
+//------------------------------Value-----------------------------------------
+// An add node sums it's two _in.  If one input is an RSD, we must mixin
+// the other input's symbols.
+const Type *AddNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // Check for an addition involving the additive identity
+  const Type *tadd = add_of_identity( t1, t2 );
+  if( tadd ) return tadd;
+
+  return add_ring(t1,t2);               // Local flavor of type addition
+}
+
+//------------------------------add_identity-----------------------------------
+// Check for addition of the identity
+const Type *AddNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+  const Type *zero = add_id();  // The additive identity
+  if( t1->higher_equal( zero ) ) return t2;
+  if( t2->higher_equal( zero ) ) return t1;
+
+  return NULL;
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *AddINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  int op1 = in(1)->Opcode();
+  int op2 = in(2)->Opcode();
+  // Fold (con1-x)+con2 into (con1+con2)-x
+  if( op1 == Op_SubI ) {
+    const Type *t_sub1 = phase->type( in(1)->in(1) );
+    const Type *t_2    = phase->type( in(2)        );
+    if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
+      return new (phase->C, 3) SubINode(phase->makecon( add_ring( t_sub1, t_2 ) ),
+                              in(1)->in(2) );
+    // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
+    if( op2 == Op_SubI ) {
+      // Check for dead cycle: d = (a-b)+(c-d)
+      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+              "dead loop in AddINode::Ideal" );
+      Node *sub  = new (phase->C, 3) SubINode(NULL, NULL);
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddINode(in(1)->in(1), in(2)->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddINode(in(1)->in(2), in(2)->in(2) ) ));
+      return sub;
+    }
+  }
+
+  // Convert "x+(0-y)" into "(x-y)"
+  if( op2 == Op_SubI && phase->type(in(2)->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode(in(1), in(2)->in(2) );
+
+  // Convert "(0-y)+x" into "(x-y)"
+  if( op1 == Op_SubI && phase->type(in(1)->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) SubINode( in(2), in(1)->in(2) );
+
+  // Convert (x>>>z)+y into (x+(y<<z))>>>z for small constant z and y.
+  // Helps with array allocation math constant folding
+  // See 4790063:
+  // Unrestricted transformation is unsafe for some runtime values of 'x'
+  // ( x ==  0, z == 1, y == -1 ) fails
+  // ( x == -5, z == 1, y ==  1 ) fails
+  // Transform works for small z and small negative y when the addition
+  // (x + (y << z)) does not cross zero.
+  // Implement support for negative y and (x >= -(y << z))
+  // Have not observed cases where type information exists to support
+  // positive y and (x <= -(y << z))
+  if( op1 == Op_URShiftI && op2 == Op_ConI &&
+      in(1)->in(2)->Opcode() == Op_ConI ) {
+    jint z = phase->type( in(1)->in(2) )->is_int()->get_con() & 0x1f; // only least significant 5 bits matter
+    jint y = phase->type( in(2) )->is_int()->get_con();
+
+    if( z < 5 && -5 < y && y < 0 ) {
+      const Type *t_in11 = phase->type(in(1)->in(1));
+      if( t_in11 != Type::TOP && (t_in11->is_int()->_lo >= -(y << z)) ) {
+        Node *a = phase->transform( new (phase->C, 3) AddINode( in(1)->in(1), phase->intcon(y<<z) ) );
+        return new (phase->C, 3) URShiftINode( a, in(1)->in(2) );
+      }
+    }
+  }
+
+  return AddNode::Ideal(phase, can_reshape);
+}
+
+
+//------------------------------Identity---------------------------------------
+// Fold (x-y)+y  OR  y+(x-y)  into  x
+Node *AddINode::Identity( PhaseTransform *phase ) {
+  if( in(1)->Opcode() == Op_SubI && phase->eqv(in(1)->in(2),in(2)) ) {
+    return in(1)->in(1);
+  }
+  else if( in(2)->Opcode() == Op_SubI && phase->eqv(in(2)->in(2),in(1)) ) {
+    return in(2)->in(1);
+  }
+  return AddNode::Identity(phase);
+}
+
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.  Guaranteed never
+// to be passed a TOP or BOTTOM type, these are filtered out by
+// pre-check.
+const Type *AddINode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+  int lo = r0->_lo + r1->_lo;
+  int hi = r0->_hi + r1->_hi;
+  if( !(r0->is_con() && r1->is_con()) ) {
+    // Not both constants, compute approximate result
+    if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
+      lo = min_jint; hi = max_jint; // Underflow on the low side
+    }
+    if( (~(r0->_hi | r1->_hi)) < 0 && hi < 0 ) {
+      lo = min_jint; hi = max_jint; // Overflow on the high side
+    }
+    if( lo > hi ) {               // Handle overflow
+      lo = min_jint; hi = max_jint;
+    }
+  } else {
+    // both constants, compute precise result using 'lo' and 'hi'
+    // Semantics define overflow and underflow for integer addition
+    // as expected.  In particular: 0x80000000 + 0x80000000 --> 0x0
+  }
+  return TypeInt::make( lo, hi, MAX2(r0->_widen,r1->_widen) );
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *AddLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  int op1 = in(1)->Opcode();
+  int op2 = in(2)->Opcode();
+  // Fold (con1-x)+con2 into (con1+con2)-x
+  if( op1 == Op_SubL ) {
+    const Type *t_sub1 = phase->type( in(1)->in(1) );
+    const Type *t_2    = phase->type( in(2)        );
+    if( t_sub1->singleton() && t_2->singleton() && t_sub1 != Type::TOP && t_2 != Type::TOP )
+      return new (phase->C, 3) SubLNode(phase->makecon( add_ring( t_sub1, t_2 ) ),
+                              in(1)->in(2) );
+    // Convert "(a-b)+(c-d)" into "(a+c)-(b+d)"
+    if( op2 == Op_SubL ) {
+      // Check for dead cycle: d = (a-b)+(c-d)
+      assert( in(1)->in(2) != this && in(2)->in(2) != this,
+              "dead loop in AddLNode::Ideal" );
+      Node *sub  = new (phase->C, 3) SubLNode(NULL, NULL);
+      sub->init_req(1, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(1), in(2)->in(1) ) ));
+      sub->init_req(2, phase->transform(new (phase->C, 3) AddLNode(in(1)->in(2), in(2)->in(2) ) ));
+      return sub;
+    }
+  }
+
+  // Convert "x+(0-y)" into "(x-y)"
+  if( op2 == Op_SubL && phase->type(in(2)->in(1)) == TypeLong::ZERO )
+    return new (phase->C, 3) SubLNode(in(1), in(2)->in(2) );
+
+  // Convert "X+X+X+X+X...+X+Y" into "k*X+Y" or really convert "X+(X+Y)"
+  // into "(X<<1)+Y" and let shift-folding happen.
+  if( op2 == Op_AddL &&
+      in(2)->in(1) == in(1) &&
+      op1 != Op_ConL &&
+      0 ) {
+    Node *shift = phase->transform(new (phase->C, 3) LShiftLNode(in(1),phase->intcon(1)));
+    return new (phase->C, 3) AddLNode(shift,in(2)->in(2));
+  }
+
+  return AddNode::Ideal(phase, can_reshape);
+}
+
+
+//------------------------------Identity---------------------------------------
+// Fold (x-y)+y  OR  y+(x-y)  into  x
+Node *AddLNode::Identity( PhaseTransform *phase ) {
+  if( in(1)->Opcode() == Op_SubL && phase->eqv(in(1)->in(2),in(2)) ) {
+    return in(1)->in(1);
+  }
+  else if( in(2)->Opcode() == Op_SubL && phase->eqv(in(2)->in(2),in(1)) ) {
+    return in(2)->in(1);
+  }
+  return AddNode::Identity(phase);
+}
+
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.  Guaranteed never
+// to be passed a TOP or BOTTOM type, these are filtered out by
+// pre-check.
+const Type *AddLNode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeLong *r0 = t0->is_long(); // Handy access
+  const TypeLong *r1 = t1->is_long();
+  jlong lo = r0->_lo + r1->_lo;
+  jlong hi = r0->_hi + r1->_hi;
+  if( !(r0->is_con() && r1->is_con()) ) {
+    // Not both constants, compute approximate result
+    if( (r0->_lo & r1->_lo) < 0 && lo >= 0 ) {
+      lo =min_jlong; hi = max_jlong; // Underflow on the low side
+    }
+    if( (~(r0->_hi | r1->_hi)) < 0 && hi < 0 ) {
+      lo = min_jlong; hi = max_jlong; // Overflow on the high side
+    }
+    if( lo > hi ) {               // Handle overflow
+      lo = min_jlong; hi = max_jlong;
+    }
+  } else {
+    // both constants, compute precise result using 'lo' and 'hi'
+    // Semantics define overflow and underflow for integer addition
+    // as expected.  In particular: 0x80000000 + 0x80000000 --> 0x0
+  }
+  return TypeLong::make( lo, hi, MAX2(r0->_widen,r1->_widen) );
+}
+
+
+//=============================================================================
+//------------------------------add_of_identity--------------------------------
+// Check for addition of the identity
+const Type *AddFNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+  // x ADD 0  should return x unless 'x' is a -zero
+  //
+  // const Type *zero = add_id();     // The additive identity
+  // jfloat f1 = t1->getf();
+  // jfloat f2 = t2->getf();
+  //
+  // if( t1->higher_equal( zero ) ) return t2;
+  // if( t2->higher_equal( zero ) ) return t1;
+
+  return NULL;
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AddFNode::add_ring( const Type *t0, const Type *t1 ) const {
+  // We must be adding 2 float constants.
+  return TypeF::make( t0->getf() + t1->getf() );
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AddFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+    return AddNode::Ideal(phase, can_reshape); // commutative and associative transforms
+  }
+
+  // Floating point additions are not associative because of boundary conditions (infinity)
+  return commute(this,
+                 phase->type( in(1) )->singleton(),
+                 phase->type( in(2) )->singleton() ) ? this : NULL;
+}
+
+
+//=============================================================================
+//------------------------------add_of_identity--------------------------------
+// Check for addition of the identity
+const Type *AddDNode::add_of_identity( const Type *t1, const Type *t2 ) const {
+  // x ADD 0  should return x unless 'x' is a -zero
+  //
+  // const Type *zero = add_id();     // The additive identity
+  // jfloat f1 = t1->getf();
+  // jfloat f2 = t2->getf();
+  //
+  // if( t1->higher_equal( zero ) ) return t2;
+  // if( t2->higher_equal( zero ) ) return t1;
+
+  return NULL;
+}
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AddDNode::add_ring( const Type *t0, const Type *t1 ) const {
+  // We must be adding 2 double constants.
+  return TypeD::make( t0->getd() + t1->getd() );
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AddDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+    return AddNode::Ideal(phase, can_reshape); // commutative and associative transforms
+  }
+
+  // Floating point additions are not associative because of boundary conditions (infinity)
+  return commute(this,
+                 phase->type( in(1) )->singleton(),
+                 phase->type( in(2) )->singleton() ) ? this : NULL;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If one input is a constant 0, return the other input.
+Node *AddPNode::Identity( PhaseTransform *phase ) {
+  return ( phase->type( in(Offset) )->higher_equal( TypeX_ZERO ) ) ? in(Address) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+Node *AddPNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Bail out if dead inputs
+  if( phase->type( in(Address) ) == Type::TOP ) return NULL;
+
+  // If the left input is an add of a constant, flatten the expression tree.
+  const Node *n = in(Address);
+  if (n->is_AddP() && n->in(Base) == in(Base)) {
+    const AddPNode *addp = n->as_AddP(); // Left input is an AddP
+    assert( !addp->in(Address)->is_AddP() ||
+             addp->in(Address)->as_AddP() != addp,
+            "dead loop in AddPNode::Ideal" );
+    // Type of left input's right input
+    const Type *t = phase->type( addp->in(Offset) );
+    if( t == Type::TOP ) return NULL;
+    const TypeX *t12 = t->is_intptr_t();
+    if( t12->is_con() ) {       // Left input is an add of a constant?
+      // If the right input is a constant, combine constants
+      const Type *temp_t2 = phase->type( in(Offset) );
+      if( temp_t2 == Type::TOP ) return NULL;
+      const TypeX *t2 = temp_t2->is_intptr_t();
+      if( t2->is_con() ) {
+        // The Add of the flattened expression
+        set_req(Address, addp->in(Address));
+        set_req(Offset , phase->MakeConX(t2->get_con() + t12->get_con()));
+        return this;                    // Made progress
+      }
+      // Else move the constant to the right.  ((A+con)+B) into ((A+B)+con)
+      set_req(Address, phase->transform(new (phase->C, 4) AddPNode(in(Base),addp->in(Address),in(Offset))));
+      set_req(Offset , addp->in(Offset));
+      return this;
+    }
+  }
+
+  // Raw pointers?
+  if( in(Base)->bottom_type() == Type::TOP ) {
+    // If this is a NULL+long form (from unsafe accesses), switch to a rawptr.
+    if (phase->type(in(Address)) == TypePtr::NULL_PTR) {
+      Node* offset = in(Offset);
+      return new (phase->C, 2) CastX2PNode(offset);
+    }
+  }
+
+  // If the right is an add of a constant, push the offset down.
+  // Convert: (ptr + (offset+con)) into (ptr+offset)+con.
+  // The idea is to merge array_base+scaled_index groups together,
+  // and only have different constant offsets from the same base.
+  const Node *add = in(Offset);
+  if( add->Opcode() == Op_AddX && add->in(1) != add ) {
+    const Type *t22 = phase->type( add->in(2) );
+    if( t22->singleton() && (t22 != Type::TOP) ) {  // Right input is an add of a constant?
+      set_req(Address, phase->transform(new (phase->C, 4) AddPNode(in(Base),in(Address),add->in(1))));
+      set_req(Offset, add->in(2));
+      return this;              // Made progress
+    }
+  }
+
+  return NULL;                  // No progress
+}
+
+//------------------------------bottom_type------------------------------------
+// Bottom-type is the pointer-type with unknown offset.
+const Type *AddPNode::bottom_type() const {
+  if (in(Address) == NULL)  return TypePtr::BOTTOM;
+  const TypePtr *tp = in(Address)->bottom_type()->isa_ptr();
+  if( !tp ) return Type::TOP;   // TOP input means TOP output
+  assert( in(Offset)->Opcode() != Op_ConP, "" );
+  const Type *t = in(Offset)->bottom_type();
+  if( t == Type::TOP )
+    return tp->add_offset(Type::OffsetTop);
+  const TypeX *tx = t->is_intptr_t();
+  intptr_t txoffset = Type::OffsetBot;
+  if (tx->is_con()) {   // Left input is an add of a constant?
+    txoffset = tx->get_con();
+    if (txoffset != (int)txoffset)
+      txoffset = Type::OffsetBot;   // oops:  add_offset will choke on it
+  }
+  return tp->add_offset(txoffset);
+}
+
+//------------------------------Value------------------------------------------
+const Type *AddPNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(Address) );
+  const Type *t2 = phase->type( in(Offset) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is a pointer
+  const TypePtr *p1 = t1->isa_ptr();
+  // Right input is an int
+  const TypeX *p2 = t2->is_intptr_t();
+  // Add 'em
+  intptr_t p2offset = Type::OffsetBot;
+  if (p2->is_con()) {   // Left input is an add of a constant?
+    p2offset = p2->get_con();
+    if (p2offset != (int)p2offset)
+      p2offset = Type::OffsetBot;   // oops:  add_offset will choke on it
+  }
+  return p1->add_offset(p2offset);
+}
+
+//------------------------Ideal_base_and_offset--------------------------------
+// Split an oop pointer into a base and offset.
+// (The offset might be Type::OffsetBot in the case of an array.)
+// Return the base, or NULL if failure.
+Node* AddPNode::Ideal_base_and_offset(Node* ptr, PhaseTransform* phase,
+                                      // second return value:
+                                      intptr_t& offset) {
+  if (ptr->is_AddP()) {
+    Node* base = ptr->in(AddPNode::Base);
+    Node* addr = ptr->in(AddPNode::Address);
+    Node* offs = ptr->in(AddPNode::Offset);
+    if (base == addr || base->is_top()) {
+      offset = phase->find_intptr_t_con(offs, Type::OffsetBot);
+      if (offset != Type::OffsetBot) {
+        return addr;
+      }
+    }
+  }
+  offset = Type::OffsetBot;
+  return NULL;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not?  Do not match base pointer edge
+uint AddPNode::match_edge(uint idx) const {
+  return idx > Base;
+}
+
+//---------------------------mach_bottom_type----------------------------------
+// Utility function for use by ADLC.  Implements bottom_type for matched AddP.
+const Type *AddPNode::mach_bottom_type( const MachNode* n) {
+  Node* base = n->in(Base);
+  const Type *t = base->bottom_type();
+  if ( t == Type::TOP ) {
+    // an untyped pointer
+    return TypeRawPtr::BOTTOM;
+  }
+  const TypePtr* tp = t->isa_oopptr();
+  if ( tp == NULL )  return t;
+  if ( tp->_offset == TypePtr::OffsetBot )  return tp;
+
+  // We must carefully add up the various offsets...
+  intptr_t offset = 0;
+  const TypePtr* tptr = NULL;
+
+  uint numopnds = n->num_opnds();
+  uint index = n->oper_input_base();
+  for ( uint i = 1; i < numopnds; i++ ) {
+    MachOper *opnd = n->_opnds[i];
+    // Check for any interesting operand info.
+    // In particular, check for both memory and non-memory operands.
+    // %%%%% Clean this up: use xadd_offset
+    int con = opnd->constant();
+    if ( con == TypePtr::OffsetBot )  goto bottom_out;
+    offset += con;
+    con = opnd->constant_disp();
+    if ( con == TypePtr::OffsetBot )  goto bottom_out;
+    offset += con;
+    if( opnd->scale() != 0 ) goto bottom_out;
+
+    // Check each operand input edge.  Find the 1 allowed pointer
+    // edge.  Other edges must be index edges; track exact constant
+    // inputs and otherwise assume the worst.
+    for ( uint j = opnd->num_edges(); j > 0; j-- ) {
+      Node* edge = n->in(index++);
+      const Type*    et  = edge->bottom_type();
+      const TypeX*   eti = et->isa_intptr_t();
+      if ( eti == NULL ) {
+        // there must be one pointer among the operands
+        guarantee(tptr == NULL, "must be only one pointer operand");
+        tptr = et->isa_oopptr();
+        guarantee(tptr != NULL, "non-int operand must be pointer");
+        continue;
+      }
+      if ( eti->_hi != eti->_lo )  goto bottom_out;
+      offset += eti->_lo;
+    }
+  }
+  guarantee(tptr != NULL, "must be exactly one pointer operand");
+  return tptr->add_offset(offset);
+
+ bottom_out:
+  return tp->add_offset(TypePtr::OffsetBot);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *OrINode::Identity( PhaseTransform *phase ) {
+  // x | x => x
+  if (phase->eqv(in(1), in(2))) {
+    return in(1);
+  }
+
+  return AddNode::Identity(phase);
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs IN THE CURRENT RING.  For
+// the logical operations the ring's ADD is really a logical OR function.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *OrINode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+
+  // If both args are bool, can figure out better types
+  if ( r0 == TypeInt::BOOL ) {
+    if ( r1 == TypeInt::ONE) {
+      return TypeInt::ONE;
+    } else if ( r1 == TypeInt::BOOL ) {
+      return TypeInt::BOOL;
+    }
+  } else if ( r0 == TypeInt::ONE ) {
+    if ( r1 == TypeInt::BOOL ) {
+      return TypeInt::ONE;
+    }
+  }
+
+  // If either input is not a constant, just return all integers.
+  if( !r0->is_con() || !r1->is_con() )
+    return TypeInt::INT;        // Any integer, but still no symbols.
+
+  // Otherwise just OR them bits.
+  return TypeInt::make( r0->get_con() | r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *OrLNode::Identity( PhaseTransform *phase ) {
+  // x | x => x
+  if (phase->eqv(in(1), in(2))) {
+    return in(1);
+  }
+
+  return AddNode::Identity(phase);
+}
+
+//------------------------------add_ring---------------------------------------
+const Type *OrLNode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeLong *r0 = t0->is_long(); // Handy access
+  const TypeLong *r1 = t1->is_long();
+
+  // If either input is not a constant, just return all integers.
+  if( !r0->is_con() || !r1->is_con() )
+    return TypeLong::LONG;      // Any integer, but still no symbols.
+
+  // Otherwise just OR them bits.
+  return TypeLong::make( r0->get_con() | r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs IN THE CURRENT RING.  For
+// the logical operations the ring's ADD is really a logical OR function.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *XorINode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+
+  // Complementing a boolean?
+  if( r0 == TypeInt::BOOL && ( r1 == TypeInt::ONE
+                               || r1 == TypeInt::BOOL))
+    return TypeInt::BOOL;
+
+  if( !r0->is_con() || !r1->is_con() ) // Not constants
+    return TypeInt::INT;        // Any integer, but still no symbols.
+
+  // Otherwise just XOR them bits.
+  return TypeInt::make( r0->get_con() ^ r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+const Type *XorLNode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeLong *r0 = t0->is_long(); // Handy access
+  const TypeLong *r1 = t1->is_long();
+
+  // If either input is not a constant, just return all integers.
+  if( !r0->is_con() || !r1->is_con() )
+    return TypeLong::LONG;      // Any integer, but still no symbols.
+
+  // Otherwise just OR them bits.
+  return TypeLong::make( r0->get_con() ^ r1->get_con() );
+}
+
+//=============================================================================
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+const Type *MaxINode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+
+  // Otherwise just MAX them bits.
+  return TypeInt::make( MAX2(r0->_lo,r1->_lo), MAX2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+// MINs show up in range-check loop limit calculations.  Look for
+// "MIN2(x+c0,MIN2(y,x+c1))".  Pick the smaller constant: "MIN2(x+c0,y)"
+Node *MinINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node *progress = NULL;
+  // Force a right-spline graph
+  Node *l = in(1);
+  Node *r = in(2);
+  // Transform  MinI1( MinI2(a,b), c)  into  MinI1( a, MinI2(b,c) )
+  // to force a right-spline graph for the rest of MinINode::Ideal().
+  if( l->Opcode() == Op_MinI ) {
+    assert( l != l->in(1), "dead loop in MinINode::Ideal" );
+    r = phase->transform(new (phase->C, 3) MinINode(l->in(2),r));
+    l = l->in(1);
+    set_req(1, l);
+    set_req(2, r);
+    return this;
+  }
+
+  // Get left input & constant
+  Node *x = l;
+  int x_off = 0;
+  if( x->Opcode() == Op_AddI && // Check for "x+c0" and collect constant
+      x->in(2)->is_Con() ) {
+    const Type *t = x->in(2)->bottom_type();
+    if( t == Type::TOP ) return NULL;  // No progress
+    x_off = t->is_int()->get_con();
+    x = x->in(1);
+  }
+
+  // Scan a right-spline-tree for MINs
+  Node *y = r;
+  int y_off = 0;
+  // Check final part of MIN tree
+  if( y->Opcode() == Op_AddI && // Check for "y+c1" and collect constant
+      y->in(2)->is_Con() ) {
+    const Type *t = y->in(2)->bottom_type();
+    if( t == Type::TOP ) return NULL;  // No progress
+    y_off = t->is_int()->get_con();
+    y = y->in(1);
+  }
+  if( x->_idx > y->_idx && r->Opcode() != Op_MinI ) {
+    swap_edges(1, 2);
+    return this;
+  }
+
+
+  if( r->Opcode() == Op_MinI ) {
+    assert( r != r->in(2), "dead loop in MinINode::Ideal" );
+    y = r->in(1);
+    // Check final part of MIN tree
+    if( y->Opcode() == Op_AddI &&// Check for "y+c1" and collect constant
+        y->in(2)->is_Con() ) {
+      const Type *t = y->in(2)->bottom_type();
+      if( t == Type::TOP ) return NULL;  // No progress
+      y_off = t->is_int()->get_con();
+      y = y->in(1);
+    }
+
+    if( x->_idx > y->_idx )
+      return new (phase->C, 3) MinINode(r->in(1),phase->transform(new (phase->C, 3) MinINode(l,r->in(2))));
+
+    // See if covers: MIN2(x+c0,MIN2(y+c1,z))
+    if( !phase->eqv(x,y) ) return NULL;
+    // If (y == x) transform MIN2(x+c0, MIN2(x+c1,z)) into
+    // MIN2(x+c0 or x+c1 which less, z).
+    return new (phase->C, 3) MinINode(phase->transform(new (phase->C, 3) AddINode(x,phase->intcon(MIN2(x_off,y_off)))),r->in(2));
+  } else {
+    // See if covers: MIN2(x+c0,y+c1)
+    if( !phase->eqv(x,y) ) return NULL;
+    // If (y == x) transform MIN2(x+c0,x+c1) into x+c0 or x+c1 which less.
+    return new (phase->C, 3) AddINode(x,phase->intcon(MIN2(x_off,y_off)));
+  }
+
+}
+
+//------------------------------add_ring---------------------------------------
+// Supplied function returns the sum of the inputs.
+const Type *MinINode::add_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+
+  // Otherwise just MIN them bits.
+  return TypeInt::make( MIN2(r0->_lo,r1->_lo), MIN2(r0->_hi,r1->_hi), MAX2(r0->_widen,r1->_widen) );
+}
diff --git a/src/share/vm/opto/addnode.hpp b/src/share/vm/opto/addnode.hpp
new file mode 100644
index 000000000..5170f50e1
--- /dev/null
+++ b/src/share/vm/opto/addnode.hpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class PhaseTransform;
+
+//------------------------------AddNode----------------------------------------
+// Classic Add functionality.  This covers all the usual 'add' behaviors for
+// an algebraic ring.  Add-integer, add-float, add-double, and binary-or are
+// all inherited from this class.  The various identity values are supplied
+// by virtual functions.
+class AddNode : public Node {
+  virtual uint hash() const;
+public:
+  AddNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
+    init_class_id(Class_Add);
+  }
+
+  // Handle algebraic identities here.  If we have an identity, return the Node
+  // we are equivalent to.  We look for "add of zero" as an identity.
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // We also canonicalize the Node, moving constants to the right input,
+  // and flatten expressions (so that 1+x+2 becomes x+3).
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  // Compute a new Type for this node.  Basically we just do the pre-check,
+  // then call the virtual add() to set the type.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  // Check if this addition involves the additive identity
+  virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+
+  // Supplied function returns the sum of the inputs.
+  // This also type-checks the inputs for sanity.  Guaranteed never to
+  // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+  virtual const Type *add_ring( const Type *, const Type * ) const = 0;
+
+  // Supplied function to return the additive identity type
+  virtual const Type *add_id() const = 0;
+
+};
+
+//------------------------------AddINode---------------------------------------
+// Add 2 integers
+class AddINode : public AddNode {
+public:
+  AddINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeInt::ZERO; }
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AddLNode---------------------------------------
+// Add 2 longs
+class AddLNode : public AddNode {
+public:
+  AddLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeLong::ZERO; }
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------AddFNode---------------------------------------
+// Add 2 floats
+class AddFNode : public AddNode {
+public:
+  AddFNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeF::ZERO; }
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual Node *Identity( PhaseTransform *phase ) { return this; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------AddDNode---------------------------------------
+// Add 2 doubles
+class AddDNode : public AddNode {
+public:
+  AddDNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *add_of_identity( const Type *t1, const Type *t2 ) const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeD::ZERO; }
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual Node *Identity( PhaseTransform *phase ) { return this; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------AddPNode---------------------------------------
+// Add pointer plus integer to get pointer.  NOT commutative, really.
+// So not really an AddNode.  Lives here, because people associate it with
+// an add.
+class AddPNode : public Node {
+public:
+  enum { Control,               // When is it safe to do this add?
+         Base,                  // Base oop, for GC purposes
+         Address,               // Actually address, derived from base
+         Offset } ;             // Offset added to address
+  AddPNode( Node *base, Node *ptr, Node *off ) : Node(0,base,ptr,off) {
+    init_class_id(Class_AddP);
+  }
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const;
+  virtual uint  ideal_reg() const { return Op_RegP; }
+  Node         *base_node() { assert( req() > Base, "Missing base"); return in(Base); }
+  static Node* Ideal_base_and_offset(Node* ptr, PhaseTransform* phase,
+                                     // second return value:
+                                     intptr_t& offset);
+  // Do not match base-ptr edge
+  virtual uint match_edge(uint idx) const;
+  static const Type *mach_bottom_type(const MachNode* n);  // used by ad_<arch>.hpp
+};
+
+//------------------------------OrINode----------------------------------------
+// Logically OR 2 integers.  Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class OrINode : public AddNode {
+public:
+  OrINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeInt::ZERO; }
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------OrLNode----------------------------------------
+// Logically OR 2 longs.  Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class OrLNode : public AddNode {
+public:
+  OrLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeLong::ZERO; }
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------XorINode---------------------------------------
+// XOR'ing 2 integers
+class XorINode : public AddNode {
+public:
+  XorINode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeInt::ZERO; }
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------XorINode---------------------------------------
+// XOR'ing 2 longs
+class XorLNode : public AddNode {
+public:
+  XorLNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeLong::ZERO; }
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------MaxNode----------------------------------------
+// Max (or min) of 2 values.  Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.  Only new thing is that we allow
+// 2 equal inputs to be equal.
+class MaxNode : public AddNode {
+public:
+  MaxNode( Node *in1, Node *in2 ) : AddNode(in1,in2) {}
+  virtual int Opcode() const = 0;
+};
+
+//------------------------------MaxINode---------------------------------------
+// Maximum of 2 integers.  Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class MaxINode : public MaxNode {
+public:
+  MaxINode( Node *in1, Node *in2 ) : MaxNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeInt::make(min_jint); }
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------MinINode---------------------------------------
+// MINimum of 2 integers.  Included with the ADD nodes because it inherits
+// all the behavior of addition on a ring.
+class MinINode : public MaxNode {
+public:
+  MinINode( Node *in1, Node *in2 ) : MaxNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *add_ring( const Type *, const Type * ) const;
+  virtual const Type *add_id() const { return TypeInt::make(max_jint); }
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
diff --git a/src/share/vm/opto/adlcVMDeps.hpp b/src/share/vm/opto/adlcVMDeps.hpp
new file mode 100644
index 000000000..7d4f14ed9
--- /dev/null
+++ b/src/share/vm/opto/adlcVMDeps.hpp
@@ -0,0 +1,44 @@
+/*
+ * Copyright 1998-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Declare commonly known constant and data structures between the
+// ADLC and the VM
+//
+
+class AdlcVMDeps : public AllStatic {
+ public:
+  // Mirror of TypeFunc types
+  enum { Control, I_O, Memory, FramePtr, ReturnAdr, Parms };
+
+  enum Cisc_Status { Not_cisc_spillable = -1 };
+
+  // Mirror of OptoReg::Name names
+  enum Name {
+    Physical = 0                // Start of physical regs
+  };
+
+  // relocInfo
+  static const char* oop_reloc_type()  { return "relocInfo::oop_type"; }
+  static const char* none_reloc_type() { return "relocInfo::none"; }
+};
diff --git a/src/share/vm/opto/block.cpp b/src/share/vm/opto/block.cpp
new file mode 100644
index 000000000..c6b94a45a
--- /dev/null
+++ b/src/share/vm/opto/block.cpp
@@ -0,0 +1,952 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_block.cpp.incl"
+
+
+//-----------------------------------------------------------------------------
+void Block_Array::grow( uint i ) {
+  assert(i >= Max(), "must be an overflow");
+  debug_only(_limit = i+1);
+  if( i < _size )  return;
+  if( !_size ) {
+    _size = 1;
+    _blocks = (Block**)_arena->Amalloc( _size * sizeof(Block*) );
+    _blocks[0] = NULL;
+  }
+  uint old = _size;
+  while( i >= _size ) _size <<= 1;      // Double to fit
+  _blocks = (Block**)_arena->Arealloc( _blocks, old*sizeof(Block*),_size*sizeof(Block*));
+  Copy::zero_to_bytes( &_blocks[old], (_size-old)*sizeof(Block*) );
+}
+
+//=============================================================================
+void Block_List::remove(uint i) {
+  assert(i < _cnt, "index out of bounds");
+  Copy::conjoint_words_to_lower((HeapWord*)&_blocks[i+1], (HeapWord*)&_blocks[i], ((_cnt-i-1)*sizeof(Block*)));
+  pop(); // shrink list by one block
+}
+
+void Block_List::insert(uint i, Block *b) {
+  push(b); // grow list by one block
+  Copy::conjoint_words_to_higher((HeapWord*)&_blocks[i], (HeapWord*)&_blocks[i+1], ((_cnt-i-1)*sizeof(Block*)));
+  _blocks[i] = b;
+}
+
+
+//=============================================================================
+
+uint Block::code_alignment() {
+  // Check for Root block
+  if( _pre_order == 0 ) return CodeEntryAlignment;
+  // Check for Start block
+  if( _pre_order == 1 ) return InteriorEntryAlignment;
+  // Check for loop alignment
+  Node *h = head();
+  if( h->is_Loop() && h->as_Loop()->is_inner_loop() )  {
+    // Pre- and post-loops have low trip count so do not bother with
+    // NOPs for align loop head.  The constants are hidden from tuning
+    // but only because my "divide by 4" heuristic surely gets nearly
+    // all possible gain (a "do not align at all" heuristic has a
+    // chance of getting a really tiny gain).
+    if( h->is_CountedLoop() && (h->as_CountedLoop()->is_pre_loop() ||
+                                h->as_CountedLoop()->is_post_loop()) )
+      return (OptoLoopAlignment > 4) ? (OptoLoopAlignment>>2) : 1;
+    // Loops with low backedge frequency should not be aligned.
+    Node *n = h->in(LoopNode::LoopBackControl)->in(0);
+    if( n->is_MachIf() && n->as_MachIf()->_prob < 0.01 ) {
+      return 1;             // Loop does not loop, more often than not!
+    }
+    return OptoLoopAlignment; // Otherwise align loop head
+  }
+  return 1;                     // no particular alignment
+}
+
+//-----------------------------------------------------------------------------
+// Compute the size of first 'inst_cnt' instructions in this block.
+// Return the number of instructions left to compute if the block has
+// less then 'inst_cnt' instructions.
+uint Block::compute_first_inst_size(uint& sum_size, uint inst_cnt,
+                                    PhaseRegAlloc* ra) {
+  uint last_inst = _nodes.size();
+  for( uint j = 0; j < last_inst && inst_cnt > 0; j++ ) {
+    uint inst_size = _nodes[j]->size(ra);
+    if( inst_size > 0 ) {
+      inst_cnt--;
+      uint sz = sum_size + inst_size;
+      if( sz <= (uint)OptoLoopAlignment ) {
+        // Compute size of instructions which fit into fetch buffer only
+        // since all inst_cnt instructions will not fit even if we align them.
+        sum_size = sz;
+      } else {
+        return 0;
+      }
+    }
+  }
+  return inst_cnt;
+}
+
+//-----------------------------------------------------------------------------
+uint Block::find_node( const Node *n ) const {
+  for( uint i = 0; i < _nodes.size(); i++ ) {
+    if( _nodes[i] == n )
+      return i;
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+// Find and remove n from block list
+void Block::find_remove( const Node *n ) {
+  _nodes.remove(find_node(n));
+}
+
+//------------------------------is_Empty---------------------------------------
+// Return empty status of a block.  Empty blocks contain only the head, other
+// ideal nodes, and an optional trailing goto.
+int Block::is_Empty() const {
+
+  // Root or start block is not considered empty
+  if (head()->is_Root() || head()->is_Start()) {
+    return not_empty;
+  }
+
+  int success_result = completely_empty;
+  int end_idx = _nodes.size()-1;
+
+  // Check for ending goto
+  if ((end_idx > 0) && (_nodes[end_idx]->is_Goto())) {
+    success_result = empty_with_goto;
+    end_idx--;
+  }
+
+  // Unreachable blocks are considered empty
+  if (num_preds() <= 1) {
+    return success_result;
+  }
+
+  // Ideal nodes are allowable in empty blocks: skip them  Only MachNodes
+  // turn directly into code, because only MachNodes have non-trivial
+  // emit() functions.
+  while ((end_idx > 0) && !_nodes[end_idx]->is_Mach()) {
+    end_idx--;
+  }
+
+  // No room for any interesting instructions?
+  if (end_idx == 0) {
+    return success_result;
+  }
+
+  return not_empty;
+}
+
+//------------------------------has_uncommon_code------------------------------
+// Return true if the block's code implies that it is not likely to be
+// executed infrequently.  Check to see if the block ends in a Halt or
+// a low probability call.
+bool Block::has_uncommon_code() const {
+  Node* en = end();
+
+  if (en->is_Goto())
+    en = en->in(0);
+  if (en->is_Catch())
+    en = en->in(0);
+  if (en->is_Proj() && en->in(0)->is_MachCall()) {
+    MachCallNode* call = en->in(0)->as_MachCall();
+    if (call->cnt() != COUNT_UNKNOWN && call->cnt() <= PROB_UNLIKELY_MAG(4)) {
+      // This is true for slow-path stubs like new_{instance,array},
+      // slow_arraycopy, complete_monitor_locking, uncommon_trap.
+      // The magic number corresponds to the probability of an uncommon_trap,
+      // even though it is a count not a probability.
+      return true;
+    }
+  }
+
+  int op = en->is_Mach() ? en->as_Mach()->ideal_Opcode() : en->Opcode();
+  return op == Op_Halt;
+}
+
+//------------------------------is_uncommon------------------------------------
+// True if block is low enough frequency or guarded by a test which
+// mostly does not go here.
+bool Block::is_uncommon( Block_Array &bbs ) const {
+  // Initial blocks must never be moved, so are never uncommon.
+  if (head()->is_Root() || head()->is_Start())  return false;
+
+  // Check for way-low freq
+  if( _freq < BLOCK_FREQUENCY(0.00001f) ) return true;
+
+  // Look for code shape indicating uncommon_trap or slow path
+  if (has_uncommon_code()) return true;
+
+  const float epsilon = 0.05f;
+  const float guard_factor = PROB_UNLIKELY_MAG(4) / (1.f - epsilon);
+  uint uncommon_preds = 0;
+  uint freq_preds = 0;
+  uint uncommon_for_freq_preds = 0;
+
+  for( uint i=1; i<num_preds(); i++ ) {
+    Block* guard = bbs[pred(i)->_idx];
+    // Check to see if this block follows its guard 1 time out of 10000
+    // or less.
+    //
+    // See list of magnitude-4 unlikely probabilities in cfgnode.hpp which
+    // we intend to be "uncommon", such as slow-path TLE allocation,
+    // predicted call failure, and uncommon trap triggers.
+    //
+    // Use an epsilon value of 5% to allow for variability in frequency
+    // predictions and floating point calculations. The net effect is
+    // that guard_factor is set to 9500.
+    //
+    // Ignore low-frequency blocks.
+    // The next check is (guard->_freq < 1.e-5 * 9500.).
+    if(guard->_freq*BLOCK_FREQUENCY(guard_factor) < BLOCK_FREQUENCY(0.00001f)) {
+      uncommon_preds++;
+    } else {
+      freq_preds++;
+      if( _freq < guard->_freq * guard_factor ) {
+        uncommon_for_freq_preds++;
+      }
+    }
+  }
+  if( num_preds() > 1 &&
+      // The block is uncommon if all preds are uncommon or
+      (uncommon_preds == (num_preds()-1) ||
+      // it is uncommon for all frequent preds.
+       uncommon_for_freq_preds == freq_preds) ) {
+    return true;
+  }
+  return false;
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void Block::dump_bidx(const Block* orig) const {
+  if (_pre_order) tty->print("B%d",_pre_order);
+  else tty->print("N%d", head()->_idx);
+
+  if (Verbose && orig != this) {
+    // Dump the original block's idx
+    tty->print(" (");
+    orig->dump_bidx(orig);
+    tty->print(")");
+  }
+}
+
+void Block::dump_pred(const Block_Array *bbs, Block* orig) const {
+  if (is_connector()) {
+    for (uint i=1; i<num_preds(); i++) {
+      Block *p = ((*bbs)[pred(i)->_idx]);
+      p->dump_pred(bbs, orig);
+    }
+  } else {
+    dump_bidx(orig);
+    tty->print(" ");
+  }
+}
+
+void Block::dump_head( const Block_Array *bbs ) const {
+  // Print the basic block
+  dump_bidx(this);
+  tty->print(": #\t");
+
+  // Print the incoming CFG edges and the outgoing CFG edges
+  for( uint i=0; i<_num_succs; i++ ) {
+    non_connector_successor(i)->dump_bidx(_succs[i]);
+    tty->print(" ");
+  }
+  tty->print("<- ");
+  if( head()->is_block_start() ) {
+    for (uint i=1; i<num_preds(); i++) {
+      Node *s = pred(i);
+      if (bbs) {
+        Block *p = (*bbs)[s->_idx];
+        p->dump_pred(bbs, p);
+      } else {
+        while (!s->is_block_start())
+          s = s->in(0);
+        tty->print("N%d ", s->_idx );
+      }
+    }
+  } else
+    tty->print("BLOCK HEAD IS JUNK  ");
+
+  // Print loop, if any
+  const Block *bhead = this;    // Head of self-loop
+  Node *bh = bhead->head();
+  if( bbs && bh->is_Loop() && !head()->is_Root() ) {
+    LoopNode *loop = bh->as_Loop();
+    const Block *bx = (*bbs)[loop->in(LoopNode::LoopBackControl)->_idx];
+    while (bx->is_connector()) {
+      bx = (*bbs)[bx->pred(1)->_idx];
+    }
+    tty->print("\tLoop: B%d-B%d ", bhead->_pre_order, bx->_pre_order);
+    // Dump any loop-specific bits, especially for CountedLoops.
+    loop->dump_spec(tty);
+  }
+  tty->print(" Freq: %g",_freq);
+  if( Verbose || WizardMode ) {
+    tty->print(" IDom: %d/#%d", _idom ? _idom->_pre_order : 0, _dom_depth);
+    tty->print(" RegPressure: %d",_reg_pressure);
+    tty->print(" IHRP Index: %d",_ihrp_index);
+    tty->print(" FRegPressure: %d",_freg_pressure);
+    tty->print(" FHRP Index: %d",_fhrp_index);
+  }
+  tty->print_cr("");
+}
+
+void Block::dump() const { dump(0); }
+
+void Block::dump( const Block_Array *bbs ) const {
+  dump_head(bbs);
+  uint cnt = _nodes.size();
+  for( uint i=0; i<cnt; i++ )
+    _nodes[i]->dump();
+  tty->print("\n");
+}
+#endif
+
+//=============================================================================
+//------------------------------PhaseCFG---------------------------------------
+PhaseCFG::PhaseCFG( Arena *a, RootNode *r, Matcher &m ) :
+  Phase(CFG),
+  _bbs(a),
+  _root(r)
+#ifndef PRODUCT
+  , _trace_opto_pipelining(TraceOptoPipelining || C->method_has_option("TraceOptoPipelining"))
+#endif
+{
+  ResourceMark rm;
+  // I'll need a few machine-specific GotoNodes.  Make an Ideal GotoNode,
+  // then Match it into a machine-specific Node.  Then clone the machine
+  // Node on demand.
+  Node *x = new (C, 1) GotoNode(NULL);
+  x->init_req(0, x);
+  _goto = m.match_tree(x);
+  assert(_goto != NULL, "");
+  _goto->set_req(0,_goto);
+
+  // Build the CFG in Reverse Post Order
+  _num_blocks = build_cfg();
+  _broot = _bbs[_root->_idx];
+}
+
+//------------------------------build_cfg--------------------------------------
+// Build a proper looking CFG.  Make every block begin with either a StartNode
+// or a RegionNode.  Make every block end with either a Goto, If or Return.
+// The RootNode both starts and ends it's own block.  Do this with a recursive
+// backwards walk over the control edges.
+uint PhaseCFG::build_cfg() {
+  Arena *a = Thread::current()->resource_area();
+  VectorSet visited(a);
+
+  // Allocate stack with enough space to avoid frequent realloc
+  Node_Stack nstack(a, C->unique() >> 1);
+  nstack.push(_root, 0);
+  uint sum = 0;                 // Counter for blocks
+
+  while (nstack.is_nonempty()) {
+    // node and in's index from stack's top
+    // 'np' is _root (see above) or RegionNode, StartNode: we push on stack
+    // only nodes which point to the start of basic block (see below).
+    Node *np = nstack.node();
+    // idx > 0, except for the first node (_root) pushed on stack
+    // at the beginning when idx == 0.
+    // We will use the condition (idx == 0) later to end the build.
+    uint idx = nstack.index();
+    Node *proj = np->in(idx);
+    const Node *x = proj->is_block_proj();
+    // Does the block end with a proper block-ending Node?  One of Return,
+    // If or Goto? (This check should be done for visited nodes also).
+    if (x == NULL) {                    // Does not end right...
+      Node *g = _goto->clone(); // Force it to end in a Goto
+      g->set_req(0, proj);
+      np->set_req(idx, g);
+      x = proj = g;
+    }
+    if (!visited.test_set(x->_idx)) { // Visit this block once
+      // Skip any control-pinned middle'in stuff
+      Node *p = proj;
+      do {
+        proj = p;                   // Update pointer to last Control
+        p = p->in(0);               // Move control forward
+      } while( !p->is_block_proj() &&
+               !p->is_block_start() );
+      // Make the block begin with one of Region or StartNode.
+      if( !p->is_block_start() ) {
+        RegionNode *r = new (C, 2) RegionNode( 2 );
+        r->init_req(1, p);         // Insert RegionNode in the way
+        proj->set_req(0, r);        // Insert RegionNode in the way
+        p = r;
+      }
+      // 'p' now points to the start of this basic block
+
+      // Put self in array of basic blocks
+      Block *bb = new (_bbs._arena) Block(_bbs._arena,p);
+      _bbs.map(p->_idx,bb);
+      _bbs.map(x->_idx,bb);
+      if( x != p )                  // Only for root is x == p
+        bb->_nodes.push((Node*)x);
+
+      // Now handle predecessors
+      ++sum;                        // Count 1 for self block
+      uint cnt = bb->num_preds();
+      for (int i = (cnt - 1); i > 0; i-- ) { // For all predecessors
+        Node *prevproj = p->in(i);  // Get prior input
+        assert( !prevproj->is_Con(), "dead input not removed" );
+        // Check to see if p->in(i) is a "control-dependent" CFG edge -
+        // i.e., it splits at the source (via an IF or SWITCH) and merges
+        // at the destination (via a many-input Region).
+        // This breaks critical edges.  The RegionNode to start the block
+        // will be added when <p,i> is pulled off the node stack
+        if ( cnt > 2 ) {             // Merging many things?
+          assert( prevproj== bb->pred(i),"");
+          if(prevproj->is_block_proj() != prevproj) { // Control-dependent edge?
+            // Force a block on the control-dependent edge
+            Node *g = _goto->clone();       // Force it to end in a Goto
+            g->set_req(0,prevproj);
+            p->set_req(i,g);
+          }
+        }
+        nstack.push(p, i);  // 'p' is RegionNode or StartNode
+      }
+    } else { // Post-processing visited nodes
+      nstack.pop();                 // remove node from stack
+      // Check if it the fist node pushed on stack at the beginning.
+      if (idx == 0) break;          // end of the build
+      // Find predecessor basic block
+      Block *pb = _bbs[x->_idx];
+      // Insert into nodes array, if not already there
+      if( !_bbs.lookup(proj->_idx) ) {
+        assert( x != proj, "" );
+        // Map basic block of projection
+        _bbs.map(proj->_idx,pb);
+        pb->_nodes.push(proj);
+      }
+      // Insert self as a child of my predecessor block
+      pb->_succs.map(pb->_num_succs++, _bbs[np->_idx]);
+      assert( pb->_nodes[ pb->_nodes.size() - pb->_num_succs ]->is_block_proj(),
+              "too many control users, not a CFG?" );
+    }
+  }
+  // Return number of basic blocks for all children and self
+  return sum;
+}
+
+//------------------------------insert_goto_at---------------------------------
+// Inserts a goto & corresponding basic block between
+// block[block_no] and its succ_no'th successor block
+void PhaseCFG::insert_goto_at(uint block_no, uint succ_no) {
+  // get block with block_no
+  assert(block_no < _num_blocks, "illegal block number");
+  Block* in  = _blocks[block_no];
+  // get successor block succ_no
+  assert(succ_no < in->_num_succs, "illegal successor number");
+  Block* out = in->_succs[succ_no];
+  // get ProjNode corresponding to the succ_no'th successor of the in block
+  ProjNode* proj = in->_nodes[in->_nodes.size() - in->_num_succs + succ_no]->as_Proj();
+  // create region for basic block
+  RegionNode* region = new (C, 2) RegionNode(2);
+  region->init_req(1, proj);
+  // setup corresponding basic block
+  Block* block = new (_bbs._arena) Block(_bbs._arena, region);
+  _bbs.map(region->_idx, block);
+  C->regalloc()->set_bad(region->_idx);
+  // add a goto node
+  Node* gto = _goto->clone(); // get a new goto node
+  gto->set_req(0, region);
+  // add it to the basic block
+  block->_nodes.push(gto);
+  _bbs.map(gto->_idx, block);
+  C->regalloc()->set_bad(gto->_idx);
+  // hook up successor block
+  block->_succs.map(block->_num_succs++, out);
+  // remap successor's predecessors if necessary
+  for (uint i = 1; i < out->num_preds(); i++) {
+    if (out->pred(i) == proj) out->head()->set_req(i, gto);
+  }
+  // remap predecessor's successor to new block
+  in->_succs.map(succ_no, block);
+  // add new basic block to basic block list
+  _blocks.insert(block_no + 1, block);
+  _num_blocks++;
+}
+
+//------------------------------no_flip_branch---------------------------------
+// Does this block end in a multiway branch that cannot have the default case
+// flipped for another case?
+static bool no_flip_branch( Block *b ) {
+  int branch_idx = b->_nodes.size() - b->_num_succs-1;
+  if( branch_idx < 1 ) return false;
+  Node *bra = b->_nodes[branch_idx];
+  if( bra->is_Catch() ) return true;
+  if( bra->is_Mach() ) {
+    if( bra->is_MachNullCheck() ) return true;
+    int iop = bra->as_Mach()->ideal_Opcode();
+    if( iop == Op_FastLock || iop == Op_FastUnlock )
+      return true;
+  }
+  return false;
+}
+
+//------------------------------convert_NeverBranch_to_Goto--------------------
+// Check for NeverBranch at block end.  This needs to become a GOTO to the
+// true target.  NeverBranch are treated as a conditional branch that always
+// goes the same direction for most of the optimizer and are used to give a
+// fake exit path to infinite loops.  At this late stage they need to turn
+// into Goto's so that when you enter the infinite loop you indeed hang.
+void PhaseCFG::convert_NeverBranch_to_Goto(Block *b) {
+  // Find true target
+  int end_idx = b->end_idx();
+  int idx = b->_nodes[end_idx+1]->as_Proj()->_con;
+  Block *succ = b->_succs[idx];
+  Node* gto = _goto->clone(); // get a new goto node
+  gto->set_req(0, b->head());
+  Node *bp = b->_nodes[end_idx];
+  b->_nodes.map(end_idx,gto); // Slam over NeverBranch
+  _bbs.map(gto->_idx, b);
+  C->regalloc()->set_bad(gto->_idx);
+  b->_nodes.pop();              // Yank projections
+  b->_nodes.pop();              // Yank projections
+  b->_succs.map(0,succ);        // Map only successor
+  b->_num_succs = 1;
+  // remap successor's predecessors if necessary
+  uint j;
+  for( j = 1; j < succ->num_preds(); j++)
+    if( succ->pred(j)->in(0) == bp )
+      succ->head()->set_req(j, gto);
+  // Kill alternate exit path
+  Block *dead = b->_succs[1-idx];
+  for( j = 1; j < dead->num_preds(); j++)
+    if( dead->pred(j)->in(0) == bp )
+      break;
+  // Scan through block, yanking dead path from
+  // all regions and phis.
+  dead->head()->del_req(j);
+  for( int k = 1; dead->_nodes[k]->is_Phi(); k++ )
+    dead->_nodes[k]->del_req(j);
+}
+
+//------------------------------MoveToNext-------------------------------------
+// Helper function to move block bx to the slot following b_index. Return
+// true if the move is successful, otherwise false
+bool PhaseCFG::MoveToNext(Block* bx, uint b_index) {
+  if (bx == NULL) return false;
+
+  // Return false if bx is already scheduled.
+  uint bx_index = bx->_pre_order;
+  if ((bx_index <= b_index) && (_blocks[bx_index] == bx)) {
+    return false;
+  }
+
+  // Find the current index of block bx on the block list
+  bx_index = b_index + 1;
+  while( bx_index < _num_blocks && _blocks[bx_index] != bx ) bx_index++;
+  assert(_blocks[bx_index] == bx, "block not found");
+
+  // If the previous block conditionally falls into bx, return false,
+  // because moving bx will create an extra jump.
+  for(uint k = 1; k < bx->num_preds(); k++ ) {
+    Block* pred = _bbs[bx->pred(k)->_idx];
+    if (pred == _blocks[bx_index-1]) {
+      if (pred->_num_succs != 1) {
+        return false;
+      }
+    }
+  }
+
+  // Reinsert bx just past block 'b'
+  _blocks.remove(bx_index);
+  _blocks.insert(b_index + 1, bx);
+  return true;
+}
+
+//------------------------------MoveToEnd--------------------------------------
+// Move empty and uncommon blocks to the end.
+void PhaseCFG::MoveToEnd(Block *b, uint i) {
+  int e = b->is_Empty();
+  if (e != Block::not_empty) {
+    if (e == Block::empty_with_goto) {
+      // Remove the goto, but leave the block.
+      b->_nodes.pop();
+    }
+    // Mark this block as a connector block, which will cause it to be
+    // ignored in certain functions such as non_connector_successor().
+    b->set_connector();
+  }
+  // Move the empty block to the end, and don't recheck.
+  _blocks.remove(i);
+  _blocks.push(b);
+}
+
+//------------------------------RemoveEmpty------------------------------------
+// Remove empty basic blocks and useless branches.
+void PhaseCFG::RemoveEmpty() {
+  // Move uncommon blocks to the end
+  uint last = _num_blocks;
+  uint i;
+  assert( _blocks[0] == _broot, "" );
+  for( i = 1; i < last; i++ ) {
+    Block *b = _blocks[i];
+
+    // Check for NeverBranch at block end.  This needs to become a GOTO to the
+    // true target.  NeverBranch are treated as a conditional branch that
+    // always goes the same direction for most of the optimizer and are used
+    // to give a fake exit path to infinite loops.  At this late stage they
+    // need to turn into Goto's so that when you enter the infinite loop you
+    // indeed hang.
+    if( b->_nodes[b->end_idx()]->Opcode() == Op_NeverBranch )
+      convert_NeverBranch_to_Goto(b);
+
+    // Look for uncommon blocks and move to end.
+    if( b->is_uncommon(_bbs) ) {
+      MoveToEnd(b, i);
+      last--;                   // No longer check for being uncommon!
+      if( no_flip_branch(b) ) { // Fall-thru case must follow?
+        b = _blocks[i];         // Find the fall-thru block
+        MoveToEnd(b, i);
+        last--;
+      }
+      i--;                      // backup block counter post-increment
+    }
+  }
+
+  // Remove empty blocks
+  uint j1;
+  last = _num_blocks;
+  for( i=0; i < last; i++ ) {
+    Block *b = _blocks[i];
+    if (i > 0) {
+      if (b->is_Empty() != Block::not_empty) {
+        MoveToEnd(b, i);
+        last--;
+        i--;
+      }
+    }
+  } // End of for all blocks
+
+  // Fixup final control flow for the blocks.  Remove jump-to-next
+  // block.  If neither arm of a IF follows the conditional branch, we
+  // have to add a second jump after the conditional.  We place the
+  // TRUE branch target in succs[0] for both GOTOs and IFs.
+  for( i=0; i < _num_blocks; i++ ) {
+    Block *b = _blocks[i];
+    b->_pre_order = i;          // turn pre-order into block-index
+
+    // Connector blocks need no further processing.
+    if (b->is_connector()) {
+      assert((i+1) == _num_blocks || _blocks[i+1]->is_connector(),
+             "All connector blocks should sink to the end");
+      continue;
+    }
+    assert(b->is_Empty() != Block::completely_empty,
+           "Empty blocks should be connectors");
+
+    Block *bnext = (i < _num_blocks-1) ? _blocks[i+1] : NULL;
+    Block *bs0 = b->non_connector_successor(0);
+
+    // Check for multi-way branches where I cannot negate the test to
+    // exchange the true and false targets.
+    if( no_flip_branch( b ) ) {
+      // Find fall through case - if must fall into its target
+      int branch_idx = b->_nodes.size() - b->_num_succs;
+      for (uint j2 = 0; j2 < b->_num_succs; j2++) {
+        const ProjNode* p = b->_nodes[branch_idx + j2]->as_Proj();
+        if (p->_con == 0) {
+          // successor j2 is fall through case
+          if (b->non_connector_successor(j2) != bnext) {
+            // but it is not the next block => insert a goto
+            insert_goto_at(i, j2);
+          }
+          // Put taken branch in slot 0
+          if( j2 == 0 && b->_num_succs == 2) {
+            // Flip targets in succs map
+            Block *tbs0 = b->_succs[0];
+            Block *tbs1 = b->_succs[1];
+            b->_succs.map( 0, tbs1 );
+            b->_succs.map( 1, tbs0 );
+          }
+          break;
+        }
+      }
+      // Remove all CatchProjs
+      for (j1 = 0; j1 < b->_num_succs; j1++) b->_nodes.pop();
+
+    } else if (b->_num_succs == 1) {
+      // Block ends in a Goto?
+      if (bnext == bs0) {
+        // We fall into next block; remove the Goto
+        b->_nodes.pop();
+      }
+
+    } else if( b->_num_succs == 2 ) { // Block ends in a If?
+      // Get opcode of 1st projection (matches _succs[0])
+      // Note: Since this basic block has 2 exits, the last 2 nodes must
+      //       be projections (in any order), the 3rd last node must be
+      //       the IfNode (we have excluded other 2-way exits such as
+      //       CatchNodes already).
+      MachNode *iff   = b->_nodes[b->_nodes.size()-3]->as_Mach();
+      ProjNode *proj0 = b->_nodes[b->_nodes.size()-2]->as_Proj();
+      ProjNode *proj1 = b->_nodes[b->_nodes.size()-1]->as_Proj();
+
+      // Assert that proj0 and succs[0] match up. Similarly for proj1 and succs[1].
+      assert(proj0->raw_out(0) == b->_succs[0]->head(), "Mismatch successor 0");
+      assert(proj1->raw_out(0) == b->_succs[1]->head(), "Mismatch successor 1");
+
+      Block *bs1 = b->non_connector_successor(1);
+
+      // Check for neither successor block following the current
+      // block ending in a conditional. If so, move one of the
+      // successors after the current one, provided that the
+      // successor was previously unscheduled, but moveable
+      // (i.e., all paths to it involve a branch).
+      if( bnext != bs0 && bnext != bs1 ) {
+
+        // Choose the more common successor based on the probability
+        // of the conditional branch.
+        Block *bx = bs0;
+        Block *by = bs1;
+
+        // _prob is the probability of taking the true path. Make
+        // p the probability of taking successor #1.
+        float p = iff->as_MachIf()->_prob;
+        if( proj0->Opcode() == Op_IfTrue ) {
+          p = 1.0 - p;
+        }
+
+        // Prefer successor #1 if p > 0.5
+        if (p > PROB_FAIR) {
+          bx = bs1;
+          by = bs0;
+        }
+
+        // Attempt the more common successor first
+        if (MoveToNext(bx, i)) {
+          bnext = bx;
+        } else if (MoveToNext(by, i)) {
+          bnext = by;
+        }
+      }
+
+      // Check for conditional branching the wrong way.  Negate
+      // conditional, if needed, so it falls into the following block
+      // and branches to the not-following block.
+
+      // Check for the next block being in succs[0].  We are going to branch
+      // to succs[0], so we want the fall-thru case as the next block in
+      // succs[1].
+      if (bnext == bs0) {
+        // Fall-thru case in succs[0], so flip targets in succs map
+        Block *tbs0 = b->_succs[0];
+        Block *tbs1 = b->_succs[1];
+        b->_succs.map( 0, tbs1 );
+        b->_succs.map( 1, tbs0 );
+        // Flip projection for each target
+        { ProjNode *tmp = proj0; proj0 = proj1; proj1 = tmp; }
+
+      } else if( bnext == bs1 ) { // Fall-thru is already in succs[1]
+
+      } else {                  // Else need a double-branch
+
+        // The existing conditional branch need not change.
+        // Add a unconditional branch to the false target.
+        // Alas, it must appear in its own block and adding a
+        // block this late in the game is complicated.  Sigh.
+        insert_goto_at(i, 1);
+      }
+
+      // Make sure we TRUE branch to the target
+      if( proj0->Opcode() == Op_IfFalse )
+        iff->negate();
+
+      b->_nodes.pop();          // Remove IfFalse & IfTrue projections
+      b->_nodes.pop();
+
+    } else {
+      // Multi-exit block, e.g. a switch statement
+      // But we don't need to do anything here
+    }
+
+  } // End of for all blocks
+
+}
+
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseCFG::_dump_cfg( const Node *end, VectorSet &visited  ) const {
+  const Node *x = end->is_block_proj();
+  assert( x, "not a CFG" );
+
+  // Do not visit this block again
+  if( visited.test_set(x->_idx) ) return;
+
+  // Skip through this block
+  const Node *p = x;
+  do {
+    p = p->in(0);               // Move control forward
+    assert( !p->is_block_proj() || p->is_Root(), "not a CFG" );
+  } while( !p->is_block_start() );
+
+  // Recursively visit
+  for( uint i=1; i<p->req(); i++ )
+    _dump_cfg(p->in(i),visited);
+
+  // Dump the block
+  _bbs[p->_idx]->dump(&_bbs);
+}
+
+void PhaseCFG::dump( ) const {
+  tty->print("\n--- CFG --- %d BBs\n",_num_blocks);
+  if( _blocks.size() ) {        // Did we do basic-block layout?
+    for( uint i=0; i<_num_blocks; i++ )
+      _blocks[i]->dump(&_bbs);
+  } else {                      // Else do it with a DFS
+    VectorSet visited(_bbs._arena);
+    _dump_cfg(_root,visited);
+  }
+}
+
+void PhaseCFG::dump_headers() {
+  for( uint i = 0; i < _num_blocks; i++ ) {
+    if( _blocks[i] == NULL ) continue;
+    _blocks[i]->dump_head(&_bbs);
+  }
+}
+
+void PhaseCFG::verify( ) const {
+  // Verify sane CFG
+  for( uint i = 0; i < _num_blocks; i++ ) {
+    Block *b = _blocks[i];
+    uint cnt = b->_nodes.size();
+    uint j;
+    for( j = 0; j < cnt; j++ ) {
+      Node *n = b->_nodes[j];
+      assert( _bbs[n->_idx] == b, "" );
+      if( j >= 1 && n->is_Mach() &&
+          n->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+        assert( j == 1 || b->_nodes[j-1]->is_Phi(),
+                "CreateEx must be first instruction in block" );
+      }
+      for( uint k = 0; k < n->req(); k++ ) {
+        Node *use = n->in(k);
+        if( use && use != n ) {
+          assert( _bbs[use->_idx] || use->is_Con(),
+                  "must have block; constants for debug info ok" );
+        }
+      }
+    }
+
+    j = b->end_idx();
+    Node *bp = (Node*)b->_nodes[b->_nodes.size()-1]->is_block_proj();
+    assert( bp, "last instruction must be a block proj" );
+    assert( bp == b->_nodes[j], "wrong number of successors for this block" );
+    if( bp->is_Catch() ) {
+      while( b->_nodes[--j]->Opcode() == Op_MachProj ) ;
+      assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" );
+    }
+    else if( bp->is_Mach() && bp->as_Mach()->ideal_Opcode() == Op_If ) {
+      assert( b->_num_succs == 2, "Conditional branch must have two targets");
+    }
+  }
+}
+#endif
+
+//=============================================================================
+//------------------------------UnionFind--------------------------------------
+UnionFind::UnionFind( uint max ) : _cnt(max), _max(max), _indices(NEW_RESOURCE_ARRAY(uint,max)) {
+  Copy::zero_to_bytes( _indices, sizeof(uint)*max );
+}
+
+void UnionFind::extend( uint from_idx, uint to_idx ) {
+  _nesting.check();
+  if( from_idx >= _max ) {
+    uint size = 16;
+    while( size <= from_idx ) size <<=1;
+    _indices = REALLOC_RESOURCE_ARRAY( uint, _indices, _max, size );
+    _max = size;
+  }
+  while( _cnt <= from_idx ) _indices[_cnt++] = 0;
+  _indices[from_idx] = to_idx;
+}
+
+void UnionFind::reset( uint max ) {
+  assert( max <= max_uint, "Must fit within uint" );
+  // Force the Union-Find mapping to be at least this large
+  extend(max,0);
+  // Initialize to be the ID mapping.
+  for( uint i=0; i<_max; i++ ) map(i,i);
+}
+
+//------------------------------Find_compress----------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint UnionFind::Find_compress( uint idx ) {
+  uint cur  = idx;
+  uint next = lookup(cur);
+  while( next != cur ) {        // Scan chain of equivalences
+    assert( next < cur, "always union smaller" );
+    cur = next;                 // until find a fixed-point
+    next = lookup(cur);
+  }
+  // Core of union-find algorithm: update chain of
+  // equivalences to be equal to the root.
+  while( idx != next ) {
+    uint tmp = lookup(idx);
+    map(idx, next);
+    idx = tmp;
+  }
+  return idx;
+}
+
+//------------------------------Find_const-------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint UnionFind::Find_const( uint idx ) const {
+  if( idx == 0 ) return idx;    // Ignore the zero idx
+  // Off the end?  This can happen during debugging dumps
+  // when data structures have not finished being updated.
+  if( idx >= _max ) return idx;
+  uint next = lookup(idx);
+  while( next != idx ) {        // Scan chain of equivalences
+    assert( next < idx, "always union smaller" );
+    idx = next;                 // until find a fixed-point
+    next = lookup(idx);
+  }
+  return next;
+}
+
+//------------------------------Union------------------------------------------
+// union 2 sets together.
+void UnionFind::Union( uint idx1, uint idx2 ) {
+  uint src = Find(idx1);
+  uint dst = Find(idx2);
+  assert( src, "" );
+  assert( dst, "" );
+  assert( src < _max, "oob" );
+  assert( dst < _max, "oob" );
+  assert( src < dst, "always union smaller" );
+  map(dst,src);
+}
diff --git a/src/share/vm/opto/block.hpp b/src/share/vm/opto/block.hpp
new file mode 100644
index 000000000..8708a4ded
--- /dev/null
+++ b/src/share/vm/opto/block.hpp
@@ -0,0 +1,510 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+class Block;
+class CFGLoop;
+class MachCallNode;
+class Matcher;
+class RootNode;
+class VectorSet;
+struct Tarjan;
+
+//------------------------------Block_Array------------------------------------
+// Map dense integer indices to Blocks.  Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Block*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+class Block_Array : public ResourceObj {
+  uint _size;                   // allocated size, as opposed to formal limit
+  debug_only(uint _limit;)      // limit to formal domain
+protected:
+  Block **_blocks;
+  void grow( uint i );          // Grow array node to fit
+
+public:
+  Arena *_arena;                // Arena to allocate in
+
+  Block_Array(Arena *a) : _arena(a), _size(OptoBlockListSize) {
+    debug_only(_limit=0);
+    _blocks = NEW_ARENA_ARRAY( a, Block *, OptoBlockListSize );
+    for( int i = 0; i < OptoBlockListSize; i++ ) {
+      _blocks[i] = NULL;
+    }
+  }
+  Block *lookup( uint i ) const // Lookup, or NULL for not mapped
+  { return (i<Max()) ? _blocks[i] : (Block*)NULL; }
+  Block *operator[] ( uint i ) const // Lookup, or assert for not mapped
+  { assert( i < Max(), "oob" ); return _blocks[i]; }
+  // Extend the mapping: index i maps to Block *n.
+  void map( uint i, Block *n ) { if( i>=Max() ) grow(i); _blocks[i] = n; }
+  uint Max() const { debug_only(return _limit); return _size; }
+};
+
+
+class Block_List : public Block_Array {
+public:
+  uint _cnt;
+  Block_List() : Block_Array(Thread::current()->resource_area()), _cnt(0) {}
+  void push( Block *b ) { map(_cnt++,b); }
+  Block *pop() { return _blocks[--_cnt]; }
+  Block *rpop() { Block *b = _blocks[0]; _blocks[0]=_blocks[--_cnt]; return b;}
+  void remove( uint i );
+  void insert( uint i, Block *n );
+  uint size() const { return _cnt; }
+  void reset() { _cnt = 0; }
+};
+
+
+class CFGElement : public ResourceObj {
+ public:
+  float _freq; // Execution frequency (estimate)
+
+  CFGElement() : _freq(0.0f) {}
+  virtual bool is_block() { return false; }
+  virtual bool is_loop()  { return false; }
+  Block*   as_Block() { assert(is_block(), "must be block"); return (Block*)this; }
+  CFGLoop* as_CFGLoop()  { assert(is_loop(),  "must be loop");  return (CFGLoop*)this;  }
+};
+
+//------------------------------Block------------------------------------------
+// This class defines a Basic Block.
+// Basic blocks are used during the output routines, and are not used during
+// any optimization pass.  They are created late in the game.
+class Block : public CFGElement {
+ public:
+  // Nodes in this block, in order
+  Node_List _nodes;
+
+  // Basic blocks have a Node which defines Control for all Nodes pinned in
+  // this block.  This Node is a RegionNode.  Exception-causing Nodes
+  // (division, subroutines) and Phi functions are always pinned.  Later,
+  // every Node will get pinned to some block.
+  Node *head() const { return _nodes[0]; }
+
+  // CAUTION: num_preds() is ONE based, so that predecessor numbers match
+  // input edges to Regions and Phis.
+  uint num_preds() const { return head()->req(); }
+  Node *pred(uint i) const { return head()->in(i); }
+
+  // Array of successor blocks, same size as projs array
+  Block_Array _succs;
+
+  // Basic blocks have some number of Nodes which split control to all
+  // following blocks.  These Nodes are always Projections.  The field in
+  // the Projection and the block-ending Node determine which Block follows.
+  uint _num_succs;
+
+  // Basic blocks also carry all sorts of good old fashioned DFS information
+  // used to find loops, loop nesting depth, dominators, etc.
+  uint _pre_order;              // Pre-order DFS number
+
+  // Dominator tree
+  uint _dom_depth;              // Depth in dominator tree for fast LCA
+  Block* _idom;                 // Immediate dominator block
+
+  CFGLoop *_loop;               // Loop to which this block belongs
+  uint _rpo;                    // Number in reverse post order walk
+
+  virtual bool is_block() { return true; }
+  float succ_prob(uint i); // return probability of i'th successor
+
+  Block* dom_lca(Block* that);  // Compute LCA in dominator tree.
+#ifdef ASSERT
+  bool dominates(Block* that) {
+    int dom_diff = this->_dom_depth - that->_dom_depth;
+    if (dom_diff > 0)  return false;
+    for (; dom_diff < 0; dom_diff++)  that = that->_idom;
+    return this == that;
+  }
+#endif
+
+  // Report the alignment required by this block.  Must be a power of 2.
+  // The previous block will insert nops to get this alignment.
+  uint code_alignment();
+
+  // BLOCK_FREQUENCY is a sentinel to mark uses of constant block frequencies.
+  // It is currently also used to scale such frequencies relative to
+  // FreqCountInvocations relative to the old value of 1500.
+#define BLOCK_FREQUENCY(f) ((f * (float) 1500) / FreqCountInvocations)
+
+  // Register Pressure (estimate) for Splitting heuristic
+  uint _reg_pressure;
+  uint _ihrp_index;
+  uint _freg_pressure;
+  uint _fhrp_index;
+
+  // Mark and visited bits for an LCA calculation in insert_anti_dependences.
+  // Since they hold unique node indexes, they do not need reinitialization.
+  node_idx_t _raise_LCA_mark;
+  void    set_raise_LCA_mark(node_idx_t x)    { _raise_LCA_mark = x; }
+  node_idx_t  raise_LCA_mark() const          { return _raise_LCA_mark; }
+  node_idx_t _raise_LCA_visited;
+  void    set_raise_LCA_visited(node_idx_t x) { _raise_LCA_visited = x; }
+  node_idx_t  raise_LCA_visited() const       { return _raise_LCA_visited; }
+
+  // Estimated size in bytes of first instructions in a loop.
+  uint _first_inst_size;
+  uint first_inst_size() const     { return _first_inst_size; }
+  void set_first_inst_size(uint s) { _first_inst_size = s; }
+
+  // Compute the size of first instructions in this block.
+  uint compute_first_inst_size(uint& sum_size, uint inst_cnt, PhaseRegAlloc* ra);
+
+  // Compute alignment padding if the block needs it.
+  // Align a loop if loop's padding is less or equal to padding limit
+  // or the size of first instructions in the loop > padding.
+  uint alignment_padding(int current_offset) {
+    int block_alignment = code_alignment();
+    int max_pad = block_alignment-relocInfo::addr_unit();
+    if( max_pad > 0 ) {
+      assert(is_power_of_2(max_pad+relocInfo::addr_unit()), "");
+      int current_alignment = current_offset & max_pad;
+      if( current_alignment != 0 ) {
+        uint padding = (block_alignment-current_alignment) & max_pad;
+        if( !head()->is_Loop() ||
+            padding <= (uint)MaxLoopPad ||
+            first_inst_size() > padding ) {
+          return padding;
+        }
+      }
+    }
+    return 0;
+  }
+
+  // Connector blocks. Connector blocks are basic blocks devoid of
+  // instructions, but may have relevant non-instruction Nodes, such as
+  // Phis or MergeMems. Such blocks are discovered and marked during the
+  // RemoveEmpty phase, and elided during Output.
+  bool _connector;
+  void set_connector() { _connector = true; }
+  bool is_connector() const { return _connector; };
+
+  // Create a new Block with given head Node.
+  // Creates the (empty) predecessor arrays.
+  Block( Arena *a, Node *headnode )
+    : CFGElement(),
+      _nodes(a),
+      _succs(a),
+      _num_succs(0),
+      _pre_order(0),
+      _idom(0),
+      _loop(NULL),
+      _reg_pressure(0),
+      _ihrp_index(1),
+      _freg_pressure(0),
+      _fhrp_index(1),
+      _raise_LCA_mark(0),
+      _raise_LCA_visited(0),
+      _first_inst_size(999999),
+      _connector(false) {
+    _nodes.push(headnode);
+  }
+
+  // Index of 'end' Node
+  uint end_idx() const {
+    // %%%%% add a proj after every goto
+    // so (last->is_block_proj() != last) always, then simplify this code
+    // This will not give correct end_idx for block 0 when it only contains root.
+    int last_idx = _nodes.size() - 1;
+    Node *last  = _nodes[last_idx];
+    assert(last->is_block_proj() == last || last->is_block_proj() == _nodes[last_idx - _num_succs], "");
+    return (last->is_block_proj() == last) ? last_idx : (last_idx - _num_succs);
+  }
+
+  // Basic blocks have a Node which ends them.  This Node determines which
+  // basic block follows this one in the program flow.  This Node is either an
+  // IfNode, a GotoNode, a JmpNode, or a ReturnNode.
+  Node *end() const { return _nodes[end_idx()]; }
+
+  // Add an instruction to an existing block.  It must go after the head
+  // instruction and before the end instruction.
+  void add_inst( Node *n ) { _nodes.insert(end_idx(),n); }
+  // Find node in block
+  uint find_node( const Node *n ) const;
+  // Find and remove n from block list
+  void find_remove( const Node *n );
+
+  // Schedule a call next in the block
+  uint sched_call(Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call);
+
+  // Perform basic-block local scheduling
+  Node *select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot);
+  void set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs );
+  void needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs);
+  bool schedule_local(PhaseCFG *cfg, Matcher &m, int *ready_cnt, VectorSet &next_call);
+  // Cleanup if any code lands between a Call and his Catch
+  void call_catch_cleanup(Block_Array &bbs);
+  // Detect implicit-null-check opportunities.  Basically, find NULL checks
+  // with suitable memory ops nearby.  Use the memory op to do the NULL check.
+  // I can generate a memory op if there is not one nearby.
+  void implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons);
+
+  // Return the empty status of a block
+  enum { not_empty, empty_with_goto, completely_empty };
+  int is_Empty() const;
+
+  // Forward through connectors
+  Block* non_connector() {
+    Block* s = this;
+    while (s->is_connector()) {
+      s = s->_succs[0];
+    }
+    return s;
+  }
+
+  // Successor block, after forwarding through connectors
+  Block* non_connector_successor(int i) const {
+    return _succs[i]->non_connector();
+  }
+
+  // Examine block's code shape to predict if it is not commonly executed.
+  bool has_uncommon_code() const;
+
+  // Use frequency calculations and code shape to predict if the block
+  // is uncommon.
+  bool is_uncommon( Block_Array &bbs ) const;
+
+#ifndef PRODUCT
+  // Debugging print of basic block
+  void dump_bidx(const Block* orig) const;
+  void dump_pred(const Block_Array *bbs, Block* orig) const;
+  void dump_head( const Block_Array *bbs ) const;
+  void dump( ) const;
+  void dump( const Block_Array *bbs ) const;
+#endif
+};
+
+
+//------------------------------PhaseCFG---------------------------------------
+// Build an array of Basic Block pointers, one per Node.
+class PhaseCFG : public Phase {
+ private:
+  // Build a proper looking cfg.  Return count of basic blocks
+  uint build_cfg();
+
+  // Perform DFS search.
+  // Setup 'vertex' as DFS to vertex mapping.
+  // Setup 'semi' as vertex to DFS mapping.
+  // Set 'parent' to DFS parent.
+  uint DFS( Tarjan *tarjan );
+
+  // Helper function to insert a node into a block
+  void schedule_node_into_block( Node *n, Block *b );
+
+  // Set the basic block for pinned Nodes
+  void schedule_pinned_nodes( VectorSet &visited );
+
+  // I'll need a few machine-specific GotoNodes.  Clone from this one.
+  MachNode *_goto;
+  void insert_goto_at(uint block_no, uint succ_no);
+
+  Block* insert_anti_dependences(Block* LCA, Node* load, bool verify = false);
+  void verify_anti_dependences(Block* LCA, Node* load) {
+    assert(LCA == _bbs[load->_idx], "should already be scheduled");
+    insert_anti_dependences(LCA, load, true);
+  }
+
+ public:
+  PhaseCFG( Arena *a, RootNode *r, Matcher &m );
+
+  uint _num_blocks;             // Count of basic blocks
+  Block_List _blocks;           // List of basic blocks
+  RootNode *_root;              // Root of whole program
+  Block_Array _bbs;             // Map Nodes to owning Basic Block
+  Block *_broot;                // Basic block of root
+  uint _rpo_ctr;
+  CFGLoop* _root_loop;
+
+  // Per node latency estimation, valid only during GCM
+  GrowableArray<uint> _node_latency;
+
+#ifndef PRODUCT
+  bool _trace_opto_pipelining;  // tracing flag
+#endif
+
+  // Build dominators
+  void Dominators();
+
+  // Estimate block frequencies based on IfNode probabilities
+  void Estimate_Block_Frequency();
+
+  // Global Code Motion.  See Click's PLDI95 paper.  Place Nodes in specific
+  // basic blocks; i.e. _bbs now maps _idx for all Nodes to some Block.
+  void GlobalCodeMotion( Matcher &m, uint unique, Node_List &proj_list );
+
+  // Compute the (backwards) latency of a node from the uses
+  void latency_from_uses(Node *n);
+
+  // Compute the (backwards) latency of a node from a single use
+  int latency_from_use(Node *n, const Node *def, Node *use);
+
+  // Compute the (backwards) latency of a node from the uses of this instruction
+  void partial_latency_of_defs(Node *n);
+
+  // Schedule Nodes early in their basic blocks.
+  bool schedule_early(VectorSet &visited, Node_List &roots);
+
+  // For each node, find the latest block it can be scheduled into
+  // and then select the cheapest block between the latest and earliest
+  // block to place the node.
+  void schedule_late(VectorSet &visited, Node_List &stack);
+
+  // Pick a block between early and late that is a cheaper alternative
+  // to late. Helper for schedule_late.
+  Block* hoist_to_cheaper_block(Block* LCA, Block* early, Node* self);
+
+  // Compute the instruction global latency with a backwards walk
+  void ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack);
+
+  // Remove empty basic blocks
+  void RemoveEmpty();
+  bool MoveToNext(Block* bx, uint b_index);
+  void MoveToEnd(Block* bx, uint b_index);
+
+  // Check for NeverBranch at block end.  This needs to become a GOTO to the
+  // true target.  NeverBranch are treated as a conditional branch that always
+  // goes the same direction for most of the optimizer and are used to give a
+  // fake exit path to infinite loops.  At this late stage they need to turn
+  // into Goto's so that when you enter the infinite loop you indeed hang.
+  void convert_NeverBranch_to_Goto(Block *b);
+
+  CFGLoop* create_loop_tree();
+
+  // Insert a node into a block, and update the _bbs
+  void insert( Block *b, uint idx, Node *n ) {
+    b->_nodes.insert( idx, n );
+    _bbs.map( n->_idx, b );
+  }
+
+#ifndef PRODUCT
+  bool trace_opto_pipelining() const { return _trace_opto_pipelining; }
+
+  // Debugging print of CFG
+  void dump( ) const;           // CFG only
+  void _dump_cfg( const Node *end, VectorSet &visited  ) const;
+  void verify() const;
+  void dump_headers();
+#else
+  bool trace_opto_pipelining() const { return false; }
+#endif
+};
+
+
+//------------------------------UnionFindInfo----------------------------------
+// Map Block indices to a block-index for a cfg-cover.
+// Array lookup in the optimized case.
+class UnionFind : public ResourceObj {
+  uint _cnt, _max;
+  uint* _indices;
+  ReallocMark _nesting;  // assertion check for reallocations
+public:
+  UnionFind( uint max );
+  void reset( uint max );  // Reset to identity map for [0..max]
+
+  uint lookup( uint nidx ) const {
+    return _indices[nidx];
+  }
+  uint operator[] (uint nidx) const { return lookup(nidx); }
+
+  void map( uint from_idx, uint to_idx ) {
+    assert( from_idx < _cnt, "oob" );
+    _indices[from_idx] = to_idx;
+  }
+  void extend( uint from_idx, uint to_idx );
+
+  uint Size() const { return _cnt; }
+
+  uint Find( uint idx ) {
+    assert( idx < 65536, "Must fit into uint");
+    uint uf_idx = lookup(idx);
+    return (uf_idx == idx) ? uf_idx : Find_compress(idx);
+  }
+  uint Find_compress( uint idx );
+  uint Find_const( uint idx ) const;
+  void Union( uint idx1, uint idx2 );
+
+};
+
+//----------------------------BlockProbPair---------------------------
+// Ordered pair of Node*.
+class BlockProbPair VALUE_OBJ_CLASS_SPEC {
+protected:
+  Block* _target;      // block target
+  float  _prob;        // probability of edge to block
+public:
+  BlockProbPair() : _target(NULL), _prob(0.0) {}
+  BlockProbPair(Block* b, float p) : _target(b), _prob(p) {}
+
+  Block* get_target() const { return _target; }
+  float get_prob() const { return _prob; }
+};
+
+//------------------------------CFGLoop-------------------------------------------
+class CFGLoop : public CFGElement {
+  int _id;
+  int _depth;
+  CFGLoop *_parent;      // root of loop tree is the method level "pseudo" loop, it's parent is null
+  CFGLoop *_sibling;     // null terminated list
+  CFGLoop *_child;       // first child, use child's sibling to visit all immediately nested loops
+  GrowableArray<CFGElement*> _members; // list of members of loop
+  GrowableArray<BlockProbPair> _exits; // list of successor blocks and their probabilities
+  float _exit_prob;       // probability any loop exit is taken on a single loop iteration
+  void update_succ_freq(Block* b, float freq);
+
+ public:
+  CFGLoop(int id) :
+    CFGElement(),
+    _id(id),
+    _depth(0),
+    _parent(NULL),
+    _sibling(NULL),
+    _child(NULL),
+    _exit_prob(1.0f) {}
+  CFGLoop* parent() { return _parent; }
+  void push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk);
+  void add_member(CFGElement *s) { _members.push(s); }
+  void add_nested_loop(CFGLoop* cl);
+  Block* head() {
+    assert(_members.at(0)->is_block(), "head must be a block");
+    Block* hd = _members.at(0)->as_Block();
+    assert(hd->_loop == this, "just checking");
+    assert(hd->head()->is_Loop(), "must begin with loop head node");
+    return hd;
+  }
+  Block* backedge_block(); // Return the block on the backedge of the loop (else NULL)
+  void compute_loop_depth(int depth);
+  void compute_freq(); // compute frequency with loop assuming head freq 1.0f
+  void scale_freq();   // scale frequency by loop trip count (including outer loops)
+  bool in_loop_nest(Block* b);
+  float trip_count() const { return 1.0f / _exit_prob; }
+  virtual bool is_loop()  { return true; }
+  int id() { return _id; }
+
+#ifndef PRODUCT
+  void dump( ) const;
+  void dump_tree() const;
+#endif
+};
diff --git a/src/share/vm/opto/buildOopMap.cpp b/src/share/vm/opto/buildOopMap.cpp
new file mode 100644
index 000000000..2116c404d
--- /dev/null
+++ b/src/share/vm/opto/buildOopMap.cpp
@@ -0,0 +1,623 @@
+/*
+ * Copyright 2002-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_buildOopMap.cpp.incl"
+
+// The functions in this file builds OopMaps after all scheduling is done.
+//
+// OopMaps contain a list of all registers and stack-slots containing oops (so
+// they can be updated by GC).  OopMaps also contain a list of derived-pointer
+// base-pointer pairs.  When the base is moved, the derived pointer moves to
+// follow it.  Finally, any registers holding callee-save values are also
+// recorded.  These might contain oops, but only the caller knows.
+//
+// BuildOopMaps implements a simple forward reaching-defs solution.  At each
+// GC point we'll have the reaching-def Nodes.  If the reaching Nodes are
+// typed as pointers (no offset), then they are oops.  Pointers+offsets are
+// derived pointers, and bases can be found from them.  Finally, we'll also
+// track reaching callee-save values.  Note that a copy of a callee-save value
+// "kills" it's source, so that only 1 copy of a callee-save value is alive at
+// a time.
+//
+// We run a simple bitvector liveness pass to help trim out dead oops.  Due to
+// irreducible loops, we can have a reaching def of an oop that only reaches
+// along one path and no way to know if it's valid or not on the other path.
+// The bitvectors are quite dense and the liveness pass is fast.
+//
+// At GC points, we consult this information to build OopMaps.  All reaching
+// defs typed as oops are added to the OopMap.  Only 1 instance of a
+// callee-save register can be recorded.  For derived pointers, we'll have to
+// find and record the register holding the base.
+//
+// The reaching def's is a simple 1-pass worklist approach.  I tried a clever
+// breadth-first approach but it was worse (showed O(n^2) in the
+// pick-next-block code).
+//
+// The relevent data is kept in a struct of arrays (it could just as well be
+// an array of structs, but the struct-of-arrays is generally a little more
+// efficient).  The arrays are indexed by register number (including
+// stack-slots as registers) and so is bounded by 200 to 300 elements in
+// practice.  One array will map to a reaching def Node (or NULL for
+// conflict/dead).  The other array will map to a callee-saved register or
+// OptoReg::Bad for not-callee-saved.
+
+
+//------------------------------OopFlow----------------------------------------
+// Structure to pass around
+struct OopFlow : public ResourceObj {
+  short *_callees;              // Array mapping register to callee-saved
+  Node **_defs;                 // array mapping register to reaching def
+                                // or NULL if dead/conflict
+  // OopFlow structs, when not being actively modified, describe the _end_ of
+  // this block.
+  Block *_b;                    // Block for this struct
+  OopFlow *_next;               // Next free OopFlow
+
+  OopFlow( short *callees, Node **defs ) : _callees(callees), _defs(defs),
+    _b(NULL), _next(NULL) { }
+
+  // Given reaching-defs for this block start, compute it for this block end
+  void compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash );
+
+  // Merge these two OopFlows into the 'this' pointer.
+  void merge( OopFlow *flow, int max_reg );
+
+  // Copy a 'flow' over an existing flow
+  void clone( OopFlow *flow, int max_size);
+
+  // Make a new OopFlow from scratch
+  static OopFlow *make( Arena *A, int max_size );
+
+  // Build an oopmap from the current flow info
+  OopMap *build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live );
+};
+
+//------------------------------compute_reach----------------------------------
+// Given reaching-defs for this block start, compute it for this block end
+void OopFlow::compute_reach( PhaseRegAlloc *regalloc, int max_reg, Dict *safehash ) {
+
+  for( uint i=0; i<_b->_nodes.size(); i++ ) {
+    Node *n = _b->_nodes[i];
+
+    if( n->jvms() ) {           // Build an OopMap here?
+      JVMState *jvms = n->jvms();
+      // no map needed for leaf calls
+      if( n->is_MachSafePoint() && !n->is_MachCallLeaf() ) {
+        int *live = (int*) (*safehash)[n];
+        assert( live, "must find live" );
+        n->as_MachSafePoint()->set_oop_map( build_oop_map(n,max_reg,regalloc, live) );
+      }
+    }
+
+    // Assign new reaching def's.
+    // Note that I padded the _defs and _callees arrays so it's legal
+    // to index at _defs[OptoReg::Bad].
+    OptoReg::Name first = regalloc->get_reg_first(n);
+    OptoReg::Name second = regalloc->get_reg_second(n);
+    _defs[first] = n;
+    _defs[second] = n;
+
+    // Pass callee-save info around copies
+    int idx = n->is_Copy();
+    if( idx ) {                 // Copies move callee-save info
+      OptoReg::Name old_first = regalloc->get_reg_first(n->in(idx));
+      OptoReg::Name old_second = regalloc->get_reg_second(n->in(idx));
+      int tmp_first = _callees[old_first];
+      int tmp_second = _callees[old_second];
+      _callees[old_first] = OptoReg::Bad; // callee-save is moved, dead in old location
+      _callees[old_second] = OptoReg::Bad;
+      _callees[first] = tmp_first;
+      _callees[second] = tmp_second;
+    } else if( n->is_Phi() ) {  // Phis do not mod callee-saves
+      assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(1))], "" );
+      assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(1))], "" );
+      assert( _callees[first] == _callees[regalloc->get_reg_first(n->in(n->req()-1))], "" );
+      assert( _callees[second] == _callees[regalloc->get_reg_second(n->in(n->req()-1))], "" );
+    } else {
+      _callees[first] = OptoReg::Bad; // No longer holding a callee-save value
+      _callees[second] = OptoReg::Bad;
+
+      // Find base case for callee saves
+      if( n->is_Proj() && n->in(0)->is_Start() ) {
+        if( OptoReg::is_reg(first) &&
+            regalloc->_matcher.is_save_on_entry(first) )
+          _callees[first] = first;
+        if( OptoReg::is_reg(second) &&
+            regalloc->_matcher.is_save_on_entry(second) )
+          _callees[second] = second;
+      }
+    }
+  }
+}
+
+//------------------------------merge------------------------------------------
+// Merge the given flow into the 'this' flow
+void OopFlow::merge( OopFlow *flow, int max_reg ) {
+  assert( _b == NULL, "merging into a happy flow" );
+  assert( flow->_b, "this flow is still alive" );
+  assert( flow != this, "no self flow" );
+
+  // Do the merge.  If there are any differences, drop to 'bottom' which
+  // is OptoReg::Bad or NULL depending.
+  for( int i=0; i<max_reg; i++ ) {
+    // Merge the callee-save's
+    if( _callees[i] != flow->_callees[i] )
+      _callees[i] = OptoReg::Bad;
+    // Merge the reaching defs
+    if( _defs[i] != flow->_defs[i] )
+      _defs[i] = NULL;
+  }
+
+}
+
+//------------------------------clone------------------------------------------
+void OopFlow::clone( OopFlow *flow, int max_size ) {
+  _b = flow->_b;
+  memcpy( _callees, flow->_callees, sizeof(short)*max_size);
+  memcpy( _defs   , flow->_defs   , sizeof(Node*)*max_size);
+}
+
+//------------------------------make-------------------------------------------
+OopFlow *OopFlow::make( Arena *A, int max_size ) {
+  short *callees = NEW_ARENA_ARRAY(A,short,max_size+1);
+  Node **defs    = NEW_ARENA_ARRAY(A,Node*,max_size+1);
+  debug_only( memset(defs,0,(max_size+1)*sizeof(Node*)) );
+  OopFlow *flow = new (A) OopFlow(callees+1, defs+1);
+  assert( &flow->_callees[OptoReg::Bad] == callees, "Ok to index at OptoReg::Bad" );
+  assert( &flow->_defs   [OptoReg::Bad] == defs   , "Ok to index at OptoReg::Bad" );
+  return flow;
+}
+
+//------------------------------bit twiddlers----------------------------------
+static int get_live_bit( int *live, int reg ) {
+  return live[reg>>LogBitsPerInt] &   (1<<(reg&(BitsPerInt-1))); }
+static void set_live_bit( int *live, int reg ) {
+         live[reg>>LogBitsPerInt] |=  (1<<(reg&(BitsPerInt-1))); }
+static void clr_live_bit( int *live, int reg ) {
+         live[reg>>LogBitsPerInt] &= ~(1<<(reg&(BitsPerInt-1))); }
+
+//------------------------------build_oop_map----------------------------------
+// Build an oopmap from the current flow info
+OopMap *OopFlow::build_oop_map( Node *n, int max_reg, PhaseRegAlloc *regalloc, int* live ) {
+  int framesize = regalloc->_framesize;
+  int max_inarg_slot = OptoReg::reg2stack(regalloc->_matcher._new_SP);
+  debug_only( char *dup_check = NEW_RESOURCE_ARRAY(char,OptoReg::stack0());
+              memset(dup_check,0,OptoReg::stack0()) );
+
+  OopMap *omap = new OopMap( framesize,  max_inarg_slot );
+  MachCallNode *mcall = n->is_MachCall() ? n->as_MachCall() : NULL;
+  JVMState* jvms = n->jvms();
+
+  // For all registers do...
+  for( int reg=0; reg<max_reg; reg++ ) {
+    if( get_live_bit(live,reg) == 0 )
+      continue;                 // Ignore if not live
+
+    // %%% C2 can use 2 OptoRegs when the physical register is only one 64bit
+    // register in that case we'll get an non-concrete register for the second
+    // half. We only need to tell the map the register once!
+    //
+    // However for the moment we disable this change and leave things as they
+    // were.
+
+    VMReg r = OptoReg::as_VMReg(OptoReg::Name(reg), framesize, max_inarg_slot);
+
+    if (false && r->is_reg() && !r->is_concrete()) {
+      continue;
+    }
+
+    // See if dead (no reaching def).
+    Node *def = _defs[reg];     // Get reaching def
+    assert( def, "since live better have reaching def" );
+
+    // Classify the reaching def as oop, derived, callee-save, dead, or other
+    const Type *t = def->bottom_type();
+    if( t->isa_oop_ptr() ) {    // Oop or derived?
+      assert( !OptoReg::is_valid(_callees[reg]), "oop can't be callee save" );
+#ifdef _LP64
+      // 64-bit pointers record oop-ishness on 2 aligned adjacent registers.
+      // Make sure both are record from the same reaching def, but do not
+      // put both into the oopmap.
+      if( (reg&1) == 1 ) {      // High half of oop-pair?
+        assert( _defs[reg-1] == _defs[reg], "both halves from same reaching def" );
+        continue;               // Do not record high parts in oopmap
+      }
+#endif
+
+      // Check for a legal reg name in the oopMap and bailout if it is not.
+      if (!omap->legal_vm_reg_name(r)) {
+        regalloc->C->record_method_not_compilable("illegal oopMap register name");
+        continue;
+      }
+      if( t->is_ptr()->_offset == 0 ) { // Not derived?
+        if( mcall ) {
+          // Outgoing argument GC mask responsibility belongs to the callee,
+          // not the caller.  Inspect the inputs to the call, to see if
+          // this live-range is one of them.
+          uint cnt = mcall->tf()->domain()->cnt();
+          uint j;
+          for( j = TypeFunc::Parms; j < cnt; j++)
+            if( mcall->in(j) == def )
+              break;            // reaching def is an argument oop
+          if( j < cnt )         // arg oops dont go in GC map
+            continue;           // Continue on to the next register
+        }
+        omap->set_oop(r);
+      } else {                  // Else it's derived.
+        // Find the base of the derived value.
+        uint i;
+        // Fast, common case, scan
+        for( i = jvms->oopoff(); i < n->req(); i+=2 )
+          if( n->in(i) == def ) break; // Common case
+        if( i == n->req() ) {   // Missed, try a more generous scan
+          // Scan again, but this time peek through copies
+          for( i = jvms->oopoff(); i < n->req(); i+=2 ) {
+            Node *m = n->in(i); // Get initial derived value
+            while( 1 ) {
+              Node *d = def;    // Get initial reaching def
+              while( 1 ) {      // Follow copies of reaching def to end
+                if( m == d ) goto found; // breaks 3 loops
+                int idx = d->is_Copy();
+                if( !idx ) break;
+                d = d->in(idx);     // Link through copy
+              }
+              int idx = m->is_Copy();
+              if( !idx ) break;
+              m = m->in(idx);
+            }
+          }
+         guarantee( 0, "must find derived/base pair" );
+        }
+      found: ;
+        Node *base = n->in(i+1); // Base is other half of pair
+        int breg = regalloc->get_reg_first(base);
+        VMReg b = OptoReg::as_VMReg(OptoReg::Name(breg), framesize, max_inarg_slot);
+
+        // I record liveness at safepoints BEFORE I make the inputs
+        // live.  This is because argument oops are NOT live at a
+        // safepoint (or at least they cannot appear in the oopmap).
+        // Thus bases of base/derived pairs might not be in the
+        // liveness data but they need to appear in the oopmap.
+        if( get_live_bit(live,breg) == 0 ) {// Not live?
+          // Flag it, so next derived pointer won't re-insert into oopmap
+          set_live_bit(live,breg);
+          // Already missed our turn?
+          if( breg < reg ) {
+            if (b->is_stack() || b->is_concrete() || true ) {
+              omap->set_oop( b);
+            }
+          }
+        }
+        if (b->is_stack() || b->is_concrete() || true ) {
+          omap->set_derived_oop( r, b);
+        }
+      }
+
+    } else if( OptoReg::is_valid(_callees[reg])) { // callee-save?
+      // It's a callee-save value
+      assert( dup_check[_callees[reg]]==0, "trying to callee save same reg twice" );
+      debug_only( dup_check[_callees[reg]]=1; )
+      VMReg callee = OptoReg::as_VMReg(OptoReg::Name(_callees[reg]));
+      if ( callee->is_concrete() || true ) {
+        omap->set_callee_saved( r, callee);
+      }
+
+    } else {
+      // Other - some reaching non-oop value
+      omap->set_value( r);
+    }
+
+  }
+
+#ifdef ASSERT
+  /* Nice, Intel-only assert
+  int cnt_callee_saves=0;
+  int reg2 = 0;
+  while (OptoReg::is_reg(reg2)) {
+    if( dup_check[reg2] != 0) cnt_callee_saves++;
+    assert( cnt_callee_saves==3 || cnt_callee_saves==5, "missed some callee-save" );
+    reg2++;
+  }
+  */
+#endif
+
+  return omap;
+}
+
+//------------------------------do_liveness------------------------------------
+// Compute backwards liveness on registers
+static void do_liveness( PhaseRegAlloc *regalloc, PhaseCFG *cfg, Block_List *worklist, int max_reg_ints, Arena *A, Dict *safehash ) {
+  int *live = NEW_ARENA_ARRAY(A, int, (cfg->_num_blocks+1) * max_reg_ints);
+  int *tmp_live = &live[cfg->_num_blocks * max_reg_ints];
+  Node *root = cfg->C->root();
+  // On CISC platforms, get the node representing the stack pointer  that regalloc
+  // used for spills
+  Node *fp = NodeSentinel;
+  if (UseCISCSpill && root->req() > 1) {
+    fp = root->in(1)->in(TypeFunc::FramePtr);
+  }
+  memset( live, 0, cfg->_num_blocks * (max_reg_ints<<LogBytesPerInt) );
+  // Push preds onto worklist
+  for( uint i=1; i<root->req(); i++ )
+    worklist->push(cfg->_bbs[root->in(i)->_idx]);
+
+  // ZKM.jar includes tiny infinite loops which are unreached from below.
+  // If we missed any blocks, we'll retry here after pushing all missed
+  // blocks on the worklist.  Normally this outer loop never trips more
+  // than once.
+  while( 1 ) {
+
+    while( worklist->size() ) { // Standard worklist algorithm
+      Block *b = worklist->rpop();
+
+      // Copy first successor into my tmp_live space
+      int s0num = b->_succs[0]->_pre_order;
+      int *t = &live[s0num*max_reg_ints];
+      for( int i=0; i<max_reg_ints; i++ )
+        tmp_live[i] = t[i];
+
+      // OR in the remaining live registers
+      for( uint j=1; j<b->_num_succs; j++ ) {
+        uint sjnum = b->_succs[j]->_pre_order;
+        int *t = &live[sjnum*max_reg_ints];
+        for( int i=0; i<max_reg_ints; i++ )
+          tmp_live[i] |= t[i];
+      }
+
+      // Now walk tmp_live up the block backwards, computing live
+      for( int k=b->_nodes.size()-1; k>=0; k-- ) {
+        Node *n = b->_nodes[k];
+        // KILL def'd bits
+        int first = regalloc->get_reg_first(n);
+        int second = regalloc->get_reg_second(n);
+        if( OptoReg::is_valid(first) ) clr_live_bit(tmp_live,first);
+        if( OptoReg::is_valid(second) ) clr_live_bit(tmp_live,second);
+
+        MachNode *m = n->is_Mach() ? n->as_Mach() : NULL;
+
+        // Check if m is potentially a CISC alternate instruction (i.e, possibly
+        // synthesized by RegAlloc from a conventional instruction and a
+        // spilled input)
+        bool is_cisc_alternate = false;
+        if (UseCISCSpill && m) {
+          is_cisc_alternate = m->is_cisc_alternate();
+        }
+
+        // GEN use'd bits
+        for( uint l=1; l<n->req(); l++ ) {
+          Node *def = n->in(l);
+          assert(def != 0, "input edge required");
+          int first = regalloc->get_reg_first(def);
+          int second = regalloc->get_reg_second(def);
+          if( OptoReg::is_valid(first) ) set_live_bit(tmp_live,first);
+          if( OptoReg::is_valid(second) ) set_live_bit(tmp_live,second);
+          // If we use the stack pointer in a cisc-alternative instruction,
+          // check for use as a memory operand.  Then reconstruct the RegName
+          // for this stack location, and set the appropriate bit in the
+          // live vector 4987749.
+          if (is_cisc_alternate && def == fp) {
+            const TypePtr *adr_type = NULL;
+            intptr_t offset;
+            const Node* base = m->get_base_and_disp(offset, adr_type);
+            if (base == NodeSentinel) {
+              // Machnode has multiple memory inputs. We are unable to reason
+              // with these, but are presuming (with trepidation) that not any of
+              // them are oops. This can be fixed by making get_base_and_disp()
+              // look at a specific input instead of all inputs.
+              assert(!def->bottom_type()->isa_oop_ptr(), "expecting non-oop mem input");
+            } else if (base != fp || offset == Type::OffsetBot) {
+              // Do nothing: the fp operand is either not from a memory use
+              // (base == NULL) OR the fp is used in a non-memory context
+              // (base is some other register) OR the offset is not constant,
+              // so it is not a stack slot.
+            } else {
+              assert(offset >= 0, "unexpected negative offset");
+              offset -= (offset % jintSize);  // count the whole word
+              int stack_reg = regalloc->offset2reg(offset);
+              if (OptoReg::is_stack(stack_reg)) {
+                set_live_bit(tmp_live, stack_reg);
+              } else {
+                assert(false, "stack_reg not on stack?");
+              }
+            }
+          }
+        }
+
+        if( n->jvms() ) {       // Record liveness at safepoint
+
+          // This placement of this stanza means inputs to calls are
+          // considered live at the callsite's OopMap.  Argument oops are
+          // hence live, but NOT included in the oopmap.  See cutout in
+          // build_oop_map.  Debug oops are live (and in OopMap).
+          int *n_live = NEW_ARENA_ARRAY(A, int, max_reg_ints);
+          for( int l=0; l<max_reg_ints; l++ )
+            n_live[l] = tmp_live[l];
+          safehash->Insert(n,n_live);
+        }
+
+      }
+
+      // Now at block top, see if we have any changes.  If so, propagate
+      // to prior blocks.
+      int *old_live = &live[b->_pre_order*max_reg_ints];
+      int l;
+      for( l=0; l<max_reg_ints; l++ )
+        if( tmp_live[l] != old_live[l] )
+          break;
+      if( l<max_reg_ints ) {     // Change!
+        // Copy in new value
+        for( l=0; l<max_reg_ints; l++ )
+          old_live[l] = tmp_live[l];
+        // Push preds onto worklist
+        for( l=1; l<(int)b->num_preds(); l++ )
+          worklist->push(cfg->_bbs[b->pred(l)->_idx]);
+      }
+    }
+
+    // Scan for any missing safepoints.  Happens to infinite loops
+    // ala ZKM.jar
+    uint i;
+    for( i=1; i<cfg->_num_blocks; i++ ) {
+      Block *b = cfg->_blocks[i];
+      uint j;
+      for( j=1; j<b->_nodes.size(); j++ )
+        if( b->_nodes[j]->jvms() &&
+            (*safehash)[b->_nodes[j]] == NULL )
+           break;
+      if( j<b->_nodes.size() ) break;
+    }
+    if( i == cfg->_num_blocks )
+      break;                    // Got 'em all
+#ifndef PRODUCT
+    if( PrintOpto && Verbose )
+      tty->print_cr("retripping live calc");
+#endif
+    // Force the issue (expensively): recheck everybody
+    for( i=1; i<cfg->_num_blocks; i++ )
+      worklist->push(cfg->_blocks[i]);
+  }
+
+}
+
+//------------------------------BuildOopMaps-----------------------------------
+// Collect GC mask info - where are all the OOPs?
+void Compile::BuildOopMaps() {
+  NOT_PRODUCT( TracePhase t3("bldOopMaps", &_t_buildOopMaps, TimeCompiler); )
+  // Can't resource-mark because I need to leave all those OopMaps around,
+  // or else I need to resource-mark some arena other than the default.
+  // ResourceMark rm;              // Reclaim all OopFlows when done
+  int max_reg = _regalloc->_max_reg; // Current array extent
+
+  Arena *A = Thread::current()->resource_area();
+  Block_List worklist;          // Worklist of pending blocks
+
+  int max_reg_ints = round_to(max_reg, BitsPerInt)>>LogBitsPerInt;
+  Dict *safehash = NULL;        // Used for assert only
+  // Compute a backwards liveness per register.  Needs a bitarray of
+  // #blocks x (#registers, rounded up to ints)
+  safehash = new Dict(cmpkey,hashkey,A);
+  do_liveness( _regalloc, _cfg, &worklist, max_reg_ints, A, safehash );
+  OopFlow *free_list = NULL;    // Free, unused
+
+  // Array mapping blocks to completed oopflows
+  OopFlow **flows = NEW_ARENA_ARRAY(A, OopFlow*, _cfg->_num_blocks);
+  memset( flows, 0, _cfg->_num_blocks*sizeof(OopFlow*) );
+
+
+  // Do the first block 'by hand' to prime the worklist
+  Block *entry = _cfg->_blocks[1];
+  OopFlow *rootflow = OopFlow::make(A,max_reg);
+  // Initialize to 'bottom' (not 'top')
+  memset( rootflow->_callees, OptoReg::Bad, max_reg*sizeof(short) );
+  memset( rootflow->_defs   ,            0, max_reg*sizeof(Node*) );
+  flows[entry->_pre_order] = rootflow;
+
+  // Do the first block 'by hand' to prime the worklist
+  rootflow->_b = entry;
+  rootflow->compute_reach( _regalloc, max_reg, safehash );
+  for( uint i=0; i<entry->_num_succs; i++ )
+    worklist.push(entry->_succs[i]);
+
+  // Now worklist contains blocks which have some, but perhaps not all,
+  // predecessors visited.
+  while( worklist.size() ) {
+    // Scan for a block with all predecessors visited, or any randoms slob
+    // otherwise.  All-preds-visited order allows me to recycle OopFlow
+    // structures rapidly and cut down on the memory footprint.
+    // Note: not all predecessors might be visited yet (must happen for
+    // irreducible loops).  This is OK, since every live value must have the
+    // SAME reaching def for the block, so any reaching def is OK.
+    uint i;
+
+    Block *b = worklist.pop();
+    // Ignore root block
+    if( b == _cfg->_broot ) continue;
+    // Block is already done?  Happens if block has several predecessors,
+    // he can get on the worklist more than once.
+    if( flows[b->_pre_order] ) continue;
+
+    // If this block has a visited predecessor AND that predecessor has this
+    // last block as his only undone child, we can move the OopFlow from the
+    // pred to this block.  Otherwise we have to grab a new OopFlow.
+    OopFlow *flow = NULL;       // Flag for finding optimized flow
+    Block *pred = (Block*)0xdeadbeef;
+    uint j;
+    // Scan this block's preds to find a done predecessor
+    for( j=1; j<b->num_preds(); j++ ) {
+      Block *p = _cfg->_bbs[b->pred(j)->_idx];
+      OopFlow *p_flow = flows[p->_pre_order];
+      if( p_flow ) {            // Predecessor is done
+        assert( p_flow->_b == p, "cross check" );
+        pred = p;               // Record some predecessor
+        // If all successors of p are done except for 'b', then we can carry
+        // p_flow forward to 'b' without copying, otherwise we have to draw
+        // from the free_list and clone data.
+        uint k;
+        for( k=0; k<p->_num_succs; k++ )
+          if( !flows[p->_succs[k]->_pre_order] &&
+              p->_succs[k] != b )
+            break;
+
+        // Either carry-forward the now-unused OopFlow for b's use
+        // or draw a new one from the free list
+        if( k==p->_num_succs ) {
+          flow = p_flow;
+          break;                // Found an ideal pred, use him
+        }
+      }
+    }
+
+    if( flow ) {
+      // We have an OopFlow that's the last-use of a predecessor.
+      // Carry it forward.
+    } else {                    // Draw a new OopFlow from the freelist
+      if( !free_list )
+        free_list = OopFlow::make(A,max_reg);
+      flow = free_list;
+      assert( flow->_b == NULL, "oopFlow is not free" );
+      free_list = flow->_next;
+      flow->_next = NULL;
+
+      // Copy/clone over the data
+      flow->clone(flows[pred->_pre_order], max_reg);
+    }
+
+    // Mark flow for block.  Blocks can only be flowed over once,
+    // because after the first time they are guarded from entering
+    // this code again.
+    assert( flow->_b == pred, "have some prior flow" );
+    flow->_b = NULL;
+
+    // Now push flow forward
+    flows[b->_pre_order] = flow;// Mark flow for this block
+    flow->_b = b;
+    flow->compute_reach( _regalloc, max_reg, safehash );
+
+    // Now push children onto worklist
+    for( i=0; i<b->_num_succs; i++ )
+      worklist.push(b->_succs[i]);
+
+  }
+}
diff --git a/src/share/vm/opto/bytecodeInfo.cpp b/src/share/vm/opto/bytecodeInfo.cpp
new file mode 100644
index 000000000..10648c654
--- /dev/null
+++ b/src/share/vm/opto/bytecodeInfo.cpp
@@ -0,0 +1,490 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_bytecodeInfo.cpp.incl"
+
+// These variables are declared in parse1.cpp
+extern int  explicit_null_checks_inserted;
+extern int  explicit_null_checks_elided;
+extern int  explicit_null_checks_inserted_old;
+extern int  explicit_null_checks_elided_old;
+extern int  nodes_created_old;
+extern int  nodes_created;
+extern int  methods_parsed_old;
+extern int  methods_parsed;
+extern int  methods_seen;
+extern int  methods_seen_old;
+
+
+//=============================================================================
+//------------------------------InlineTree-------------------------------------
+InlineTree::InlineTree( Compile* c, const InlineTree *caller_tree, ciMethod* callee, JVMState* caller_jvms, int caller_bci, float site_invoke_ratio )
+: C(c), _caller_jvms(caller_jvms),
+  _caller_tree((InlineTree*)caller_tree),
+  _method(callee), _site_invoke_ratio(site_invoke_ratio),
+  _count_inline_bcs(method()->code_size()) {
+  NOT_PRODUCT(_count_inlines = 0;)
+  if (_caller_jvms != NULL) {
+    // Keep a private copy of the caller_jvms:
+    _caller_jvms = new (C) JVMState(caller_jvms->method(), caller_tree->caller_jvms());
+    _caller_jvms->set_bci(caller_jvms->bci());
+  }
+  assert(_caller_jvms->same_calls_as(caller_jvms), "consistent JVMS");
+  assert((caller_tree == NULL ? 0 : caller_tree->inline_depth() + 1) == inline_depth(), "correct (redundant) depth parameter");
+  assert(caller_bci == this->caller_bci(), "correct (redundant) bci parameter");
+  if (UseOldInlining) {
+    // Update hierarchical counts, count_inline_bcs() and count_inlines()
+    InlineTree *caller = (InlineTree *)caller_tree;
+    for( ; caller != NULL; caller = ((InlineTree *)(caller->caller_tree())) ) {
+      caller->_count_inline_bcs += count_inline_bcs();
+      NOT_PRODUCT(caller->_count_inlines++;)
+    }
+  }
+}
+
+InlineTree::InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio)
+: C(c), _caller_jvms(caller_jvms), _caller_tree(NULL),
+  _method(callee_method), _site_invoke_ratio(site_invoke_ratio),
+  _count_inline_bcs(method()->code_size()) {
+  NOT_PRODUCT(_count_inlines = 0;)
+  assert(!UseOldInlining, "do not use for old stuff");
+}
+
+
+
+static void print_indent(int depth) {
+  tty->print("      ");
+  for (int i = depth; i != 0; --i) tty->print("  ");
+}
+
+// positive filter: should send be inlined?  returns NULL, if yes, or rejection msg
+const char* InlineTree::shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const {
+  // Allows targeted inlining
+  if(callee_method->should_inline()) {
+    *wci_result = *(WarmCallInfo::always_hot());
+    if (PrintInlining && Verbose) {
+      print_indent(inline_depth());
+      tty->print_cr("Inlined method is hot: ");
+    }
+    return NULL;
+  }
+
+  // positive filter: should send be inlined?  returns NULL (--> yes)
+  // or rejection msg
+  int max_size = C->max_inline_size();
+  int size     = callee_method->code_size();
+
+  // Check for too many throws (and not too huge)
+  if(callee_method->interpreter_throwout_count() > InlineThrowCount && size < InlineThrowMaxSize ) {
+    wci_result->set_profit(wci_result->profit() * 100);
+    if (PrintInlining && Verbose) {
+      print_indent(inline_depth());
+      tty->print_cr("Inlined method with many throws (throws=%d):", callee_method->interpreter_throwout_count());
+    }
+    return NULL;
+  }
+
+  if (!UseOldInlining) {
+    return NULL;  // size and frequency are represented in a new way
+  }
+
+  int call_site_count  = method()->scale_count(profile.count());
+  int invoke_count     = method()->interpreter_invocation_count();
+  assert( invoke_count != 0, "Require invokation count greater than zero");
+  int freq = call_site_count/invoke_count;
+  // bump the max size if the call is frequent
+  if ((freq >= InlineFrequencyRatio) || (call_site_count >= InlineFrequencyCount)) {
+    max_size = C->freq_inline_size();
+    if (size <= max_size && TraceFrequencyInlining) {
+      print_indent(inline_depth());
+      tty->print_cr("Inlined frequent method (freq=%d count=%d):", freq, call_site_count);
+      print_indent(inline_depth());
+      callee_method->print();
+      tty->cr();
+    }
+  } else {
+    // Not hot.  Check for medium-sized pre-existing nmethod at cold sites.
+    if (callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode/4)
+      return "already compiled into a medium method";
+  }
+  if (size > max_size) {
+    if (max_size > C->max_inline_size())
+      return "hot method too big";
+    return "too big";
+  }
+  return NULL;
+}
+
+
+// negative filter: should send NOT be inlined?  returns NULL, ok to inline, or rejection msg
+const char* InlineTree::shouldNotInline(ciMethod *callee_method, WarmCallInfo* wci_result) const {
+  // negative filter: should send NOT be inlined?  returns NULL (--> inline) or rejection msg
+  if (!UseOldInlining) {
+    const char* fail = NULL;
+    if (callee_method->is_abstract())               fail = "abstract method";
+    // note: we allow ik->is_abstract()
+    if (!callee_method->holder()->is_initialized()) fail = "method holder not initialized";
+    if (callee_method->is_native())                 fail = "native method";
+
+    if (fail) {
+      *wci_result = *(WarmCallInfo::always_cold());
+      return fail;
+    }
+
+    if (callee_method->has_unloaded_classes_in_signature()) {
+      wci_result->set_profit(wci_result->profit() * 0.1);
+    }
+
+    // don't inline exception code unless the top method belongs to an
+    // exception class
+    if (callee_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+      ciMethod* top_method = caller_jvms() ? caller_jvms()->of_depth(1)->method() : method();
+      if (!top_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+        wci_result->set_profit(wci_result->profit() * 0.1);
+      }
+    }
+
+    if (callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode) {
+      wci_result->set_profit(wci_result->profit() * 0.1);
+      // %%% adjust wci_result->size()?
+    }
+
+    return NULL;
+  }
+
+  // First check all inlining restrictions which are required for correctness
+  if (callee_method->is_abstract())               return "abstract method";
+  // note: we allow ik->is_abstract()
+  if (!callee_method->holder()->is_initialized()) return "method holder not initialized";
+  if (callee_method->is_native())                 return "native method";
+  if (callee_method->has_unloaded_classes_in_signature()) return "unloaded signature classes";
+
+  if (callee_method->should_inline()) {
+    // ignore heuristic controls on inlining
+    return NULL;
+  }
+
+  // Now perform checks which are heuristic
+
+  if( callee_method->has_compiled_code() && callee_method->instructions_size() > InlineSmallCode )
+    return "already compiled into a big method";
+
+  // don't inline exception code unless the top method belongs to an
+  // exception class
+  if (caller_tree() != NULL &&
+      callee_method->holder()->is_subclass_of(C->env()->Throwable_klass())) {
+    const InlineTree *top = this;
+    while (top->caller_tree() != NULL) top = top->caller_tree();
+    ciInstanceKlass* k = top->method()->holder();
+    if (!k->is_subclass_of(C->env()->Throwable_klass()))
+      return "exception method";
+  }
+
+  // use frequency-based objections only for non-trivial methods
+  if (callee_method->code_size() <= MaxTrivialSize) return NULL;
+  if (UseInterpreter && !CompileTheWorld) { // don't use counts with -Xcomp or CTW
+    if (!callee_method->has_compiled_code() && !callee_method->was_executed_more_than(0)) return "never executed";
+    if (!callee_method->was_executed_more_than(MIN2(MinInliningThreshold, CompileThreshold >> 1))) return "executed < MinInliningThreshold times";
+  }
+
+  if (callee_method->should_not_inline()) {
+    return "disallowed by CompilerOracle";
+  }
+
+  return NULL;
+}
+
+//-----------------------------try_to_inline-----------------------------------
+// return NULL if ok, reason for not inlining otherwise
+// Relocated from "InliningClosure::try_to_inline"
+const char* InlineTree::try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) {
+  ciMethod* caller_method = method();
+
+  // Old algorithm had funny accumulating BC-size counters
+  if (UseOldInlining && ClipInlining
+      && (int)count_inline_bcs() >= DesiredMethodLimit) {
+    return "size > DesiredMethodLimit";
+  }
+
+  const char *msg = NULL;
+  if ((msg = shouldInline(callee_method, caller_bci, profile, wci_result)) != NULL) return msg;
+  if ((msg = shouldNotInline(callee_method,                   wci_result)) != NULL) return msg;
+
+  bool is_accessor = InlineAccessors && callee_method->is_accessor();
+
+  // suppress a few checks for accessors and trivial methods
+  if (!is_accessor && callee_method->code_size() > MaxTrivialSize) {
+    // don't inline into giant methods
+    if (C->unique() > (uint)NodeCountInliningCutoff) return "NodeCountInliningCutoff";
+
+    // don't inline unreached call sites
+    if (profile.count() == 0)                        return "call site not reached";
+  }
+
+  if (!C->do_inlining() && InlineAccessors && !is_accessor) return "not an accessor";
+
+  if( inline_depth() > MaxInlineLevel )           return "inlining too deep";
+  if( method() == callee_method &&
+      inline_depth() > MaxRecursiveInlineLevel )  return "recursively inlining too deep";
+
+  int size = callee_method->code_size();
+
+  if (UseOldInlining && ClipInlining
+      && (int)count_inline_bcs() + size >= DesiredMethodLimit) {
+    return "size > DesiredMethodLimit";
+  }
+
+  // ok, inline this method
+  return NULL;
+}
+
+//------------------------------pass_initial_checks----------------------------
+bool pass_initial_checks(ciMethod* caller_method, int caller_bci, ciMethod* callee_method) {
+  ciInstanceKlass *callee_holder = callee_method ? callee_method->holder() : NULL;
+  // Check if a callee_method was suggested
+  if( callee_method == NULL )            return false;
+  // Check if klass of callee_method is loaded
+  if( !callee_holder->is_loaded() )      return false;
+  if( !callee_holder->is_initialized() ) return false;
+  if( !UseInterpreter || CompileTheWorld /* running Xcomp or CTW */ ) {
+    // Checks that constant pool's call site has been visited
+    // stricter than callee_holder->is_initialized()
+    ciBytecodeStream iter(caller_method);
+    iter.force_bci(caller_bci);
+    int index = iter.get_index_big();
+    if( !caller_method->is_klass_loaded(index, true) ) {
+      return false;
+    }
+    // Try to do constant pool resolution if running Xcomp
+    Bytecodes::Code call_bc = iter.cur_bc();
+    if( !caller_method->check_call(index, call_bc == Bytecodes::_invokestatic) ) {
+      return false;
+    }
+  }
+  // We will attempt to see if a class/field/etc got properly loaded.  If it
+  // did not, it may attempt to throw an exception during our probing.  Catch
+  // and ignore such exceptions and do not attempt to compile the method.
+  if( callee_method->should_exclude() )  return false;
+
+  return true;
+}
+
+#ifndef PRODUCT
+//------------------------------print_inlining---------------------------------
+// Really, the failure_msg can be a success message also.
+void InlineTree::print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const {
+  print_indent(inline_depth());
+  tty->print("@ %d  ", caller_bci);
+  if( callee_method ) callee_method->print_short_name();
+  else                tty->print(" callee not monotonic or profiled");
+  tty->print("  %s", (failure_msg ? failure_msg : "inline"));
+  if( Verbose && callee_method ) {
+    const InlineTree *top = this;
+    while( top->caller_tree() != NULL ) { top = top->caller_tree(); }
+    tty->print("  bcs: %d+%d  invoked: %d", top->count_inline_bcs(), callee_method->code_size(), callee_method->interpreter_invocation_count());
+  }
+  tty->cr();
+}
+#endif
+
+//------------------------------ok_to_inline-----------------------------------
+WarmCallInfo* InlineTree::ok_to_inline(ciMethod* callee_method, JVMState* jvms, ciCallProfile& profile, WarmCallInfo* initial_wci) {
+  assert(callee_method != NULL, "caller checks for optimized virtual!");
+#ifdef ASSERT
+  // Make sure the incoming jvms has the same information content as me.
+  // This means that we can eventually make this whole class AllStatic.
+  if (jvms->caller() == NULL) {
+    assert(_caller_jvms == NULL, "redundant instance state");
+  } else {
+    assert(_caller_jvms->same_calls_as(jvms->caller()), "redundant instance state");
+  }
+  assert(_method == jvms->method(), "redundant instance state");
+#endif
+  const char *failure_msg   = NULL;
+  int         caller_bci    = jvms->bci();
+  ciMethod   *caller_method = jvms->method();
+
+  if( !pass_initial_checks(caller_method, caller_bci, callee_method)) {
+    if( PrintInlining ) {
+      failure_msg = "failed_initial_checks";
+      print_inlining( callee_method, caller_bci, failure_msg);
+    }
+    return NULL;
+  }
+
+  // Check if inlining policy says no.
+  WarmCallInfo wci = *(initial_wci);
+  failure_msg = try_to_inline(callee_method, caller_bci, profile, &wci);
+  if (failure_msg != NULL && C->log() != NULL) {
+    C->log()->begin_elem("inline_fail reason='");
+    C->log()->text("%s", failure_msg);
+    C->log()->end_elem("'");
+  }
+
+#ifndef PRODUCT
+  if (UseOldInlining && InlineWarmCalls
+      && (PrintOpto || PrintOptoInlining || PrintInlining)) {
+    bool cold = wci.is_cold();
+    bool hot  = !cold && wci.is_hot();
+    bool old_cold = (failure_msg != NULL);
+    if (old_cold != cold || (Verbose || WizardMode)) {
+      tty->print("   OldInlining= %4s : %s\n           WCI=",
+                 old_cold ? "cold" : "hot", failure_msg ? failure_msg : "OK");
+      wci.print();
+    }
+  }
+#endif
+  if (UseOldInlining) {
+    if (failure_msg == NULL)
+      wci = *(WarmCallInfo::always_hot());
+    else
+      wci = *(WarmCallInfo::always_cold());
+  }
+  if (!InlineWarmCalls) {
+    if (!wci.is_cold() && !wci.is_hot()) {
+      // Do not inline the warm calls.
+      wci = *(WarmCallInfo::always_cold());
+    }
+  }
+
+  if (!wci.is_cold()) {
+    // In -UseOldInlining, the failure_msg may also be a success message.
+    if (failure_msg == NULL)  failure_msg = "inline (hot)";
+
+    // Inline!
+    if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+    if (UseOldInlining)
+      build_inline_tree_for_callee(callee_method, jvms, caller_bci);
+    if (InlineWarmCalls && !wci.is_hot())
+      return new (C) WarmCallInfo(wci);  // copy to heap
+    return WarmCallInfo::always_hot();
+  }
+
+  // Do not inline
+  if (failure_msg == NULL)  failure_msg = "too cold to inline";
+  if( PrintInlining ) print_inlining( callee_method, caller_bci, failure_msg);
+  return NULL;
+}
+
+//------------------------------compute_callee_frequency-----------------------
+float InlineTree::compute_callee_frequency( int caller_bci ) const {
+  int count  = method()->interpreter_call_site_count(caller_bci);
+  int invcnt = method()->interpreter_invocation_count();
+  float freq = (float)count/(float)invcnt;
+  // Call-site count / interpreter invocation count, scaled recursively.
+  // Always between 0.0 and 1.0.  Represents the percentage of the method's
+  // total execution time used at this call site.
+
+  return freq;
+}
+
+//------------------------------build_inline_tree_for_callee-------------------
+InlineTree *InlineTree::build_inline_tree_for_callee( ciMethod* callee_method, JVMState* caller_jvms, int caller_bci) {
+  float recur_frequency = _site_invoke_ratio * compute_callee_frequency(caller_bci);
+  // Attempt inlining.
+  InlineTree* old_ilt = callee_at(caller_bci, callee_method);
+  if (old_ilt != NULL) {
+    return old_ilt;
+  }
+  InlineTree *ilt = new InlineTree( C, this, callee_method, caller_jvms, caller_bci, recur_frequency );
+  _subtrees.append( ilt );
+
+  NOT_PRODUCT( _count_inlines += 1; )
+
+  return ilt;
+}
+
+
+//---------------------------------------callee_at-----------------------------
+InlineTree *InlineTree::callee_at(int bci, ciMethod* callee) const {
+  for (int i = 0; i < _subtrees.length(); i++) {
+    InlineTree* sub = _subtrees.at(i);
+    if (sub->caller_bci() == bci && callee == sub->method()) {
+      return sub;
+    }
+  }
+  return NULL;
+}
+
+
+//------------------------------build_inline_tree_root-------------------------
+InlineTree *InlineTree::build_inline_tree_root() {
+  Compile* C = Compile::current();
+
+  // Root of inline tree
+  InlineTree *ilt = new InlineTree(C, NULL, C->method(), NULL, -1, 1.0F);
+
+  return ilt;
+}
+
+
+//-------------------------find_subtree_from_root-----------------------------
+// Given a jvms, which determines a call chain from the root method,
+// find the corresponding inline tree.
+// Note: This method will be removed or replaced as InlineTree goes away.
+InlineTree* InlineTree::find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found) {
+  InlineTree* iltp = root;
+  uint depth = jvms && jvms->has_method() ? jvms->depth() : 0;
+  for (uint d = 1; d <= depth; d++) {
+    JVMState* jvmsp  = jvms->of_depth(d);
+    // Select the corresponding subtree for this bci.
+    assert(jvmsp->method() == iltp->method(), "tree still in sync");
+    ciMethod* d_callee = (d == depth) ? callee : jvms->of_depth(d+1)->method();
+    InlineTree* sub = iltp->callee_at(jvmsp->bci(), d_callee);
+    if (!sub) {
+      if (create_if_not_found && d == depth) {
+        return iltp->build_inline_tree_for_callee(d_callee, jvmsp, jvmsp->bci());
+      }
+      assert(sub != NULL, "should be a sub-ilt here");
+      return NULL;
+    }
+    iltp = sub;
+  }
+  return iltp;
+}
+
+// ----------------------------------------------------------------------------
+#ifndef PRODUCT
+
+static void per_method_stats() {
+  // Compute difference between this method's cumulative totals and old totals
+  int explicit_null_checks_cur = explicit_null_checks_inserted - explicit_null_checks_inserted_old;
+  int elided_null_checks_cur = explicit_null_checks_elided - explicit_null_checks_elided_old;
+
+  // Print differences
+  if( explicit_null_checks_cur )
+    tty->print_cr("XXX Explicit NULL checks inserted: %d", explicit_null_checks_cur);
+  if( elided_null_checks_cur )
+    tty->print_cr("XXX Explicit NULL checks removed at parse time: %d", elided_null_checks_cur);
+
+  // Store the current cumulative totals
+  nodes_created_old = nodes_created;
+  methods_parsed_old = methods_parsed;
+  methods_seen_old = methods_seen;
+  explicit_null_checks_inserted_old = explicit_null_checks_inserted;
+  explicit_null_checks_elided_old = explicit_null_checks_elided;
+}
+
+#endif
diff --git a/src/share/vm/opto/c2_globals.cpp b/src/share/vm/opto/c2_globals.cpp
new file mode 100644
index 000000000..5715b24ba
--- /dev/null
+++ b/src/share/vm/opto/c2_globals.cpp
@@ -0,0 +1,28 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+# include "incls/_precompiled.incl"
+# include "incls/_c2_globals.cpp.incl"
+
+C2_FLAGS(MATERIALIZE_DEVELOPER_FLAG, MATERIALIZE_PD_DEVELOPER_FLAG, MATERIALIZE_PRODUCT_FLAG, MATERIALIZE_PD_PRODUCT_FLAG, MATERIALIZE_DIAGNOSTIC_FLAG, MATERIALIZE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/c2_globals.hpp b/src/share/vm/opto/c2_globals.hpp
new file mode 100644
index 000000000..360300255
--- /dev/null
+++ b/src/share/vm/opto/c2_globals.hpp
@@ -0,0 +1,382 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Defines all globals flags used by the server compiler.
+//
+
+#define C2_FLAGS(develop, develop_pd, product, product_pd, diagnostic, notproduct) \
+                                                                            \
+  notproduct(intx, CompileZapFirst, 0,                                      \
+          "If +ZapDeadCompiledLocals, "                                     \
+          "skip this many before compiling in zap calls")                   \
+                                                                            \
+  notproduct(intx, CompileZapLast, -1,                                      \
+          "If +ZapDeadCompiledLocals, "                                     \
+          "compile this many after skipping (incl. skip count, -1 = all)")  \
+                                                                            \
+  notproduct(intx, ZapDeadCompiledLocalsFirst, 0,                           \
+          "If +ZapDeadCompiledLocals, "                                     \
+          "skip this many before really doing it")                          \
+                                                                            \
+  notproduct(intx, ZapDeadCompiledLocalsLast, -1,                           \
+          "If +ZapDeadCompiledLocals, "                                     \
+          "do this many after skipping (incl. skip count, -1 = all)")       \
+                                                                            \
+  develop(intx, OptoPrologueNops, 0,                                        \
+          "Insert this many extra nop instructions "                        \
+          "in the prologue of every nmethod")                               \
+                                                                            \
+  product_pd(intx, InteriorEntryAlignment,                                  \
+          "Code alignment for interior entry points "                       \
+          "in generated code (in bytes)")                                   \
+                                                                            \
+  product_pd(intx, OptoLoopAlignment,                                       \
+          "Align inner loops to zero relative to this modulus")             \
+                                                                            \
+  product(intx, MaxLoopPad, (OptoLoopAlignment-1),                          \
+          "Align a loop if padding size in bytes is less or equal to this value") \
+                                                                            \
+  product(intx, NumberOfLoopInstrToAlign, 4,                                \
+          "Number of first instructions in a loop to align")                \
+                                                                            \
+  notproduct(intx, IndexSetWatch, 0,                                        \
+          "Trace all operations on this IndexSet (-1 means all, 0 none)")   \
+                                                                            \
+  develop(intx, OptoNodeListSize, 4,                                        \
+          "Starting allocation size of Node_List data structures")          \
+                                                                            \
+  develop(intx, OptoBlockListSize, 8,                                       \
+          "Starting allocation size of Block_List data structures")         \
+                                                                            \
+  develop(intx, OptoPeepholeAt, -1,                                         \
+          "Apply peephole optimizations to this peephole rule")             \
+                                                                            \
+  notproduct(bool, PrintIdeal, false,                                       \
+          "Print ideal graph before code generation")                       \
+                                                                            \
+  notproduct(bool, PrintOpto, false,                                        \
+          "Print compiler2 attempts")                                       \
+                                                                            \
+  notproduct(bool, PrintOptoInlining, false,                                \
+          "Print compiler2 inlining decisions")                             \
+                                                                            \
+  notproduct(bool, VerifyOpto, false,                                       \
+          "Apply more time consuming verification during compilation")      \
+                                                                            \
+  notproduct(bool, VerifyOptoOopOffsets, false,                             \
+          "Check types of base addresses in field references")              \
+                                                                            \
+  develop(bool, IdealizedNumerics, false,                                   \
+          "Check performance difference allowing FP "                       \
+          "associativity and commutativity...")                             \
+                                                                            \
+  develop(bool, OptoBreakpoint, false,                                      \
+          "insert breakpoint at method entry")                              \
+                                                                            \
+  notproduct(bool, OptoBreakpointOSR, false,                                \
+          "insert breakpoint at osr method entry")                          \
+                                                                            \
+  notproduct(intx, BreakAtNode, 0,                                          \
+          "Break at construction of this Node (either _idx or _debug_idx)") \
+                                                                            \
+  notproduct(bool, OptoBreakpointC2R, false,                                \
+          "insert breakpoint at runtime stub entry")                        \
+                                                                            \
+  notproduct(bool, OptoNoExecute, false,                                    \
+          "Attempt to parse and compile but do not execute generated code") \
+                                                                            \
+  notproduct(bool, PrintOptoStatistics, false,                              \
+          "Print New compiler statistics")                                  \
+                                                                            \
+  notproduct(bool, PrintOptoAssembly, false,                                \
+          "Print New compiler assembly output")                             \
+                                                                            \
+  develop_pd(bool, OptoPeephole,                                            \
+          "Apply peephole optimizations after register allocation")         \
+                                                                            \
+  develop(bool, OptoRemoveUseless, true,                                    \
+          "Remove useless nodes after parsing")                             \
+                                                                            \
+  notproduct(bool, PrintFrameConverterAssembly, false,                      \
+          "Print New compiler assembly output for frame converters")        \
+                                                                            \
+  notproduct(bool, PrintParseStatistics, false,                             \
+          "Print nodes, transforms and new values made per bytecode parsed")\
+                                                                            \
+  notproduct(bool, PrintOptoPeephole, false,                                \
+          "Print New compiler peephole replacements")                       \
+                                                                            \
+  develop(bool, PrintCFGBlockFreq, false,                                   \
+          "Print CFG block freqencies")                                     \
+                                                                            \
+  develop(bool, TraceOptoParse, false,                                      \
+          "Trace bytecode parse and control-flow merge")                    \
+                                                                            \
+  product_pd(intx,  LoopUnrollLimit,                                        \
+          "Unroll loop bodies with node count less than this")              \
+                                                                            \
+  product(intx,  LoopUnrollMin, 4,                                          \
+          "Minimum number of unroll loop bodies before checking progress"   \
+          "of rounds of unroll,optimize,..")                                \
+                                                                            \
+  develop(intx, UnrollLimitForProfileCheck, 1,                              \
+          "Don't use profile_trip_cnt() to restrict unrolling until "       \
+          "unrolling would push the number of unrolled iterations above "   \
+          "UnrollLimitForProfileCheck. A higher value allows more "         \
+          "unrolling. Zero acts as a very large value." )                   \
+                                                                            \
+  product(intx, MultiArrayExpandLimit, 6,                                   \
+          "Maximum number of individual allocations in an inline-expanded " \
+          "multianewarray instruction")                                     \
+                                                                            \
+  notproduct(bool, TraceProfileTripCount, false,                            \
+          "Trace profile loop trip count information")                      \
+                                                                            \
+  develop(bool, OptoCoalesce, true,                                         \
+          "Use Conservative Copy Coalescing in the Register Allocator")     \
+                                                                            \
+  develop(bool, UseUniqueSubclasses, true,                                  \
+          "Narrow an abstract reference to the unique concrete subclass")   \
+                                                                            \
+  develop(bool, UseExactTypes, true,                                        \
+          "Use exact types to eliminate array store checks and v-calls")    \
+                                                                            \
+  product(intx, TrackedInitializationLimit, 50,                             \
+          "When initializing fields, track up to this many words")          \
+                                                                            \
+  product(bool, ReduceFieldZeroing, true,                                   \
+          "When initializing fields, try to avoid needless zeroing")        \
+                                                                            \
+  product(bool, ReduceInitialCardMarks, true,                               \
+          "When initializing fields, try to avoid needless card marks")     \
+                                                                            \
+  product(bool, ReduceBulkZeroing, true,                                    \
+          "When bulk-initializing, try to avoid needless zeroing")          \
+                                                                            \
+  develop_pd(intx, RegisterCostAreaRatio,                                   \
+          "Spill selection in reg allocator: scale area by (X/64K) before " \
+          "adding cost")                                                    \
+                                                                            \
+  develop_pd(bool, UseCISCSpill,                                            \
+          "Use ADLC supplied cisc instructions during allocation")          \
+                                                                            \
+  notproduct(bool, VerifyGraphEdges , false,                                \
+          "Verify Bi-directional Edges")                                    \
+                                                                            \
+  notproduct(bool, VerifyDUIterators, true,                                 \
+          "Verify the safety of all iterations of Bi-directional Edges")    \
+                                                                            \
+  notproduct(bool, VerifyHashTableKeys, true,                               \
+          "Verify the immutability of keys in the VN hash tables")          \
+                                                                            \
+  develop_pd(intx, FLOATPRESSURE,                                           \
+          "Number of float LRG's that constitute high register pressure")   \
+                                                                            \
+  develop_pd(intx, INTPRESSURE,                                             \
+          "Number of integer LRG's that constitute high register pressure") \
+                                                                            \
+  notproduct(bool, TraceOptoPipelining, false,                              \
+          "Trace pipelining information")                                   \
+                                                                            \
+  notproduct(bool, TraceOptoOutput, false,                                  \
+          "Trace pipelining information")                                   \
+                                                                            \
+  product_pd(bool, OptoScheduling,                                          \
+          "Instruction Scheduling after register allocation")               \
+                                                                            \
+  product(bool, PartialPeelLoop, true,                                      \
+          "Partial peel (rotate) loops")                                    \
+                                                                            \
+  product(intx, PartialPeelNewPhiDelta, 0,                                  \
+          "Additional phis that can be created by partial peeling")         \
+                                                                            \
+  notproduct(bool, TracePartialPeeling, false,                              \
+          "Trace partial peeling (loop rotation) information")              \
+                                                                            \
+  product(bool, PartialPeelAtUnsignedTests, true,                           \
+          "Partial peel at unsigned tests if no signed test exists")        \
+                                                                            \
+  product(bool, ReassociateInvariants, true,                                \
+          "Enable reassociation of expressions with loop invariants.")      \
+                                                                            \
+  product(bool, LoopUnswitching, true,                                      \
+          "Enable loop unswitching (a form of invariant test hoisting)")    \
+                                                                            \
+  notproduct(bool, TraceLoopUnswitching, false,                             \
+          "Trace loop unswitching")                                         \
+                                                                            \
+  product(bool, UseSuperWord, true,                                         \
+          "Transform scalar operations into superword operations")          \
+                                                                            \
+  develop(bool, SuperWordRTDepCheck, false,                                 \
+          "Enable runtime dependency checks.")                              \
+                                                                            \
+  product(bool, TraceSuperWord, false,                                      \
+          "Trace superword transforms")                                     \
+                                                                            \
+  product_pd(bool, OptoBundling,                                            \
+          "Generate nops to fill i-cache lines")                            \
+                                                                            \
+  product_pd(intx, ConditionalMoveLimit,                                    \
+          "Limit of ops to make speculative when using CMOVE")              \
+                                                                            \
+  /* Set BranchOnRegister == false. See 4965987. */                         \
+  product(bool, BranchOnRegister, false,                                    \
+          "Use Sparc V9 branch-on-register opcodes")                        \
+                                                                            \
+  develop(bool, SparcV9RegsHiBitsZero, true,                                \
+          "Assume Sparc V9 I&L registers on V8+ systems are zero-extended") \
+                                                                            \
+  develop(intx, PrintIdealGraphLevel, 0,                                    \
+          "Print ideal graph to XML file / network interface. "             \
+          "By default attempts to connect to the visualizer on a socket.")  \
+                                                                            \
+  develop(intx, PrintIdealGraphPort, 4444,                                  \
+          "Ideal graph printer to network port")                            \
+                                                                            \
+  develop(ccstr, PrintIdealGraphAddress, "127.0.0.1",                       \
+          "IP address to connect to visualizer")                            \
+                                                                            \
+  develop(ccstr, PrintIdealGraphFile, NULL,                                 \
+          "File to dump ideal graph to.  If set overrides the "             \
+          "use of the network")                                             \
+                                                                            \
+  product(bool, UseOldInlining, true,                                       \
+          "Enable the 1.3 inlining strategy")                               \
+                                                                            \
+  product(bool, UseBimorphicInlining, true,                                 \
+          "Profiling based inlining for two receivers")                     \
+                                                                            \
+  product(bool, UseOnlyInlinedBimorphic, true,                              \
+          "Don't use BimorphicInlining if can't inline a second method")    \
+                                                                            \
+  product(bool, InsertMemBarAfterArraycopy, true,                           \
+          "Insert memory barrier after arraycopy call")                     \
+                                                                            \
+  /* controls for tier 1 compilations */                                    \
+                                                                            \
+  develop(bool, Tier1CountInvocations, true,                                \
+          "Generate code, during tier 1, to update invocation counter")     \
+                                                                            \
+  product(intx, Tier1Inline, false,                                         \
+          "enable inlining during tier 1")                                  \
+                                                                            \
+  product(intx, Tier1MaxInlineSize, 8,                                      \
+          "maximum bytecode size of a method to be inlined, during tier 1") \
+                                                                            \
+  product(intx, Tier1FreqInlineSize, 35,                                    \
+          "max bytecode size of a frequent method to be inlined, tier 1")   \
+                                                                            \
+  develop(intx, ImplicitNullCheckThreshold, 3,                              \
+          "Don't do implicit null checks if NPE's in a method exceeds limit") \
+                                                                            \
+ /* controls for loop optimization */                                       \
+  product(intx, Tier1LoopOptsCount, 0,                                      \
+          "Set level of loop optimization for tier 1 compiles")             \
+                                                                            \
+  product(intx, LoopOptsCount, 43,                                          \
+          "Set level of loop optimization for tier 1 compiles")             \
+                                                                            \
+  /* controls for heat-based inlining */                                    \
+                                                                            \
+  develop(intx, NodeCountInliningCutoff, 18000,                             \
+          "If parser node generation exceeds limit stop inlining")          \
+                                                                            \
+  develop(intx, NodeCountInliningStep, 1000,                                \
+          "Target size of warm calls inlined between optimization passes")  \
+                                                                            \
+  develop(bool, InlineWarmCalls, false,                                     \
+          "Use a heat-based priority queue to govern inlining")             \
+                                                                            \
+  develop(intx, HotCallCountThreshold, 999999,                              \
+          "large numbers of calls (per method invocation) force hotness")   \
+                                                                            \
+  develop(intx, HotCallProfitThreshold, 999999,                             \
+          "highly profitable inlining opportunities force hotness")         \
+                                                                            \
+  develop(intx, HotCallTrivialWork, -1,                                     \
+          "trivial execution time (no larger than this) forces hotness")    \
+                                                                            \
+  develop(intx, HotCallTrivialSize, -1,                                     \
+          "trivial methods (no larger than this) force calls to be hot")    \
+                                                                            \
+  develop(intx, WarmCallMinCount, -1,                                       \
+          "number of calls (per method invocation) to enable inlining")     \
+                                                                            \
+  develop(intx, WarmCallMinProfit, -1,                                      \
+          "number of calls (per method invocation) to enable inlining")     \
+                                                                            \
+  develop(intx, WarmCallMaxWork, 999999,                                    \
+          "execution time of the largest inlinable method")                 \
+                                                                            \
+  develop(intx, WarmCallMaxSize, 999999,                                    \
+          "size of the largest inlinable method")                           \
+                                                                            \
+  product(intx, MaxNodeLimit, 65000,                                        \
+          "Maximum number of nodes")                                        \
+                                                                            \
+  product(intx, NodeLimitFudgeFactor, 1000,                                 \
+          "Fudge Factor for certain optimizations")                         \
+                                                                            \
+  product(bool, UseJumpTables, true,                                        \
+          "Use JumpTables instead of a binary search tree for switches")    \
+                                                                            \
+  product(bool, UseDivMod, true,                                            \
+          "Use combined DivMod instruction if available")                   \
+                                                                            \
+  product(intx, MinJumpTableSize, 18,                                       \
+          "Minimum number of targets in a generated jump table")            \
+                                                                            \
+  product(intx, MaxJumpTableSize, 65000,                                    \
+          "Maximum number of targets in a generated jump table")            \
+                                                                            \
+  product(intx, MaxJumpTableSparseness, 5,                                  \
+          "Maximum sparseness for jumptables")                              \
+                                                                            \
+  product(bool, EliminateLocks, true,                                       \
+          "Coarsen locks when possible")                                    \
+                                                                            \
+  notproduct(bool, PrintLockStatistics, false,                              \
+          "Print precise statistics on the dynamic lock usage")             \
+                                                                            \
+  diagnostic(bool, PrintPreciseBiasedLockingStatistics, false,              \
+          "Print per-lock-site statistics of biased locking in JVM")        \
+                                                                            \
+  notproduct(bool, PrintEliminateLocks, false,                              \
+          "Print out when locks are eliminated")                            \
+                                                                            \
+  product(bool, DoEscapeAnalysis, false,                                    \
+          "Perform escape analysis")                                        \
+                                                                            \
+  notproduct(bool, PrintEscapeAnalysis, false,                              \
+          "Print the results of escape analysis")                           \
+                                                                            \
+  product(bool, EliminateAllocations, true,                                 \
+          "Use escape analysis to eliminate allocations")                   \
+                                                                            \
+  product(intx, MaxLabelRootDepth, 1100,                                    \
+          "Maximum times call Label_Root to prevent stack overflow")        \
+
+C2_FLAGS(DECLARE_DEVELOPER_FLAG, DECLARE_PD_DEVELOPER_FLAG, DECLARE_PRODUCT_FLAG, DECLARE_PD_PRODUCT_FLAG, DECLARE_DIAGNOSTIC_FLAG, DECLARE_NOTPRODUCT_FLAG)
diff --git a/src/share/vm/opto/c2compiler.cpp b/src/share/vm/opto/c2compiler.cpp
new file mode 100644
index 000000000..6543e692b
--- /dev/null
+++ b/src/share/vm/opto/c2compiler.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_c2compiler.cpp.incl"
+
+
+volatile int C2Compiler::_runtimes = uninitialized;
+
+// register information defined by ADLC
+extern const char register_save_policy[];
+extern const int  register_save_type[];
+
+const char* C2Compiler::retry_no_subsuming_loads() {
+  return "retry without subsuming loads";
+}
+void C2Compiler::initialize_runtime() {
+
+  // Check assumptions used while running ADLC
+  Compile::adlc_verification();
+  assert(REG_COUNT <= ConcreteRegisterImpl::number_of_registers, "incompatible register counts");
+
+  for (int i = 0; i < ConcreteRegisterImpl::number_of_registers ; i++ ) {
+      OptoReg::vm2opto[i] = OptoReg::Bad;
+  }
+
+  for( OptoReg::Name i=OptoReg::Name(0); i<OptoReg::Name(REG_COUNT); i = OptoReg::add(i,1) ) {
+    VMReg r = OptoReg::as_VMReg(i);
+    if (r->is_valid()) {
+      OptoReg::vm2opto[r->value()] = i;
+    }
+  }
+
+  // Check that runtime and architecture description agree on callee-saved-floats
+  bool callee_saved_floats = false;
+  for( OptoReg::Name i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+    // Is there a callee-saved float or double?
+    if( register_save_policy[i] == 'E' /* callee-saved */ &&
+       (register_save_type[i] == Op_RegF || register_save_type[i] == Op_RegD) ) {
+      callee_saved_floats = true;
+    }
+  }
+
+  DEBUG_ONLY( Node::init_NodeProperty(); )
+
+  Compile::pd_compiler2_init();
+
+  CompilerThread* thread = CompilerThread::current();
+
+  HandleMark  handle_mark(thread);
+
+  OptoRuntime::generate(thread->env());
+
+}
+
+
+void C2Compiler::initialize() {
+
+  // This method can only be called once per C2Compiler object
+  // The first compiler thread that gets here will initialize the
+  // small amount of global state (and runtime stubs) that c2 needs.
+
+  // There is a race possible once at startup and then we're fine
+
+  // Note that this is being called from a compiler thread not the
+  // main startup thread.
+
+  if (_runtimes != initialized) {
+    initialize_runtimes( initialize_runtime, &_runtimes);
+  }
+
+  // Mark this compiler object as ready to roll
+  mark_initialized();
+}
+
+void C2Compiler::compile_method(ciEnv* env,
+                                ciMethod* target,
+                                int entry_bci) {
+  if (!is_initialized()) {
+    initialize();
+  }
+  bool subsume_loads = true;
+  while (!env->failing()) {
+    // Attempt to compile while subsuming loads into machine instructions.
+    Compile C(env, this, target, entry_bci, subsume_loads);
+
+    // Check result and retry if appropriate.
+    if (C.failure_reason() != NULL) {
+        if (C.failure_reason_is(retry_no_subsuming_loads())) {
+        assert(subsume_loads, "must make progress");
+        subsume_loads = false;
+        continue;  // retry
+      }
+      // Pass any other failure reason up to the ciEnv.
+      // Note that serious, irreversible failures are already logged
+      // on the ciEnv via env->record_method_not_compilable().
+      env->record_failure(C.failure_reason());
+    }
+
+    // No retry; just break the loop.
+    break;
+  }
+}
+
+
+void C2Compiler::print_timers() {
+  // do nothing
+}
diff --git a/src/share/vm/opto/c2compiler.hpp b/src/share/vm/opto/c2compiler.hpp
new file mode 100644
index 000000000..dc5851082
--- /dev/null
+++ b/src/share/vm/opto/c2compiler.hpp
@@ -0,0 +1,56 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class C2Compiler : public AbstractCompiler {
+private:
+
+  static void initialize_runtime();
+
+public:
+  // Name
+  const char *name() { return "C2"; }
+
+  static volatile int _runtimes;
+
+#ifdef TIERED
+  virtual bool is_c2() { return true; };
+#endif // TIERED
+
+  // Customization
+  bool needs_adapters         () { return true; }
+  bool needs_stubs            () { return true; }
+
+  void initialize();
+
+  // Compilation entry point for methods
+  void compile_method(ciEnv* env,
+                      ciMethod* target,
+                      int entry_bci);
+
+  // sentinel value used to trigger backtracking in compile_method().
+  static const char* retry_no_subsuming_loads();
+
+  // Print compilation timers and statistics
+  void print_timers();
+};
diff --git a/src/share/vm/opto/callGenerator.cpp b/src/share/vm/opto/callGenerator.cpp
new file mode 100644
index 000000000..3131cf6b3
--- /dev/null
+++ b/src/share/vm/opto/callGenerator.cpp
@@ -0,0 +1,744 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_callGenerator.cpp.incl"
+
+CallGenerator::CallGenerator(ciMethod* method) {
+  _method = method;
+}
+
+// Utility function.
+const TypeFunc* CallGenerator::tf() const {
+  return TypeFunc::make(method());
+}
+
+//-----------------------------ParseGenerator---------------------------------
+// Internal class which handles all direct bytecode traversal.
+class ParseGenerator : public InlineCallGenerator {
+private:
+  bool  _is_osr;
+  float _expected_uses;
+
+public:
+  ParseGenerator(ciMethod* method, float expected_uses, bool is_osr = false)
+    : InlineCallGenerator(method)
+  {
+    _is_osr        = is_osr;
+    _expected_uses = expected_uses;
+    assert(can_parse(method, is_osr), "parse must be possible");
+  }
+
+  // Can we build either an OSR or a regular parser for this method?
+  static bool can_parse(ciMethod* method, int is_osr = false);
+
+  virtual bool      is_parse() const           { return true; }
+  virtual JVMState* generate(JVMState* jvms);
+  int is_osr() { return _is_osr; }
+
+};
+
+JVMState* ParseGenerator::generate(JVMState* jvms) {
+  Compile* C = Compile::current();
+
+  if (is_osr()) {
+    // The JVMS for a OSR has a single argument (see its TypeFunc).
+    assert(jvms->depth() == 1, "no inline OSR");
+  }
+
+  if (C->failing()) {
+    return NULL;  // bailing out of the compile; do not try to parse
+  }
+
+  Parse parser(jvms, method(), _expected_uses);
+  // Grab signature for matching/allocation
+#ifdef ASSERT
+  if (parser.tf() != (parser.depth() == 1 ? C->tf() : tf())) {
+    MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
+    assert(C->env()->system_dictionary_modification_counter_changed(),
+           "Must invalidate if TypeFuncs differ");
+  }
+#endif
+
+  GraphKit& exits = parser.exits();
+
+  if (C->failing()) {
+    while (exits.pop_exception_state() != NULL) ;
+    return NULL;
+  }
+
+  assert(exits.jvms()->same_calls_as(jvms), "sanity");
+
+  // Simply return the exit state of the parser,
+  // augmented by any exceptional states.
+  return exits.transfer_exceptions_into_jvms();
+}
+
+//---------------------------DirectCallGenerator------------------------------
+// Internal class which handles all out-of-line calls w/o receiver type checks.
+class DirectCallGenerator : public CallGenerator {
+public:
+  DirectCallGenerator(ciMethod* method)
+    : CallGenerator(method)
+  {
+  }
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+JVMState* DirectCallGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  bool is_static = method()->is_static();
+  address target = is_static ? SharedRuntime::get_resolve_static_call_stub()
+                             : SharedRuntime::get_resolve_opt_virtual_call_stub();
+
+  if (kit.C->log() != NULL) {
+    kit.C->log()->elem("direct_call bci='%d'", jvms->bci());
+  }
+
+  CallStaticJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallStaticJavaNode(tf(), target, method(), kit.bci());
+  if (!is_static) {
+    // Make an explicit receiver null_check as part of this call.
+    // Since we share a map with the caller, his JVMS gets adjusted.
+    kit.null_check_receiver(method());
+    if (kit.stopped()) {
+      // And dump it back to the caller, decorated with any exceptions:
+      return kit.transfer_exceptions_into_jvms();
+    }
+    // Mark the call node as virtual, sort of:
+    call->set_optimized_virtual(true);
+  }
+  kit.set_arguments_for_java_call(call);
+  kit.set_edges_for_java_call(call);
+  Node* ret = kit.set_results_for_java_call(call);
+  kit.push_node(method()->return_type()->basic_type(), ret);
+  return kit.transfer_exceptions_into_jvms();
+}
+
+class VirtualCallGenerator : public CallGenerator {
+private:
+  int _vtable_index;
+public:
+  VirtualCallGenerator(ciMethod* method, int vtable_index)
+    : CallGenerator(method), _vtable_index(vtable_index)
+  {
+    assert(vtable_index == methodOopDesc::invalid_vtable_index ||
+           vtable_index >= 0, "either invalid or usable");
+  }
+  virtual bool      is_virtual() const          { return true; }
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+//--------------------------VirtualCallGenerator------------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+JVMState* VirtualCallGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  Node* receiver = kit.argument(0);
+
+  if (kit.C->log() != NULL) {
+    kit.C->log()->elem("virtual_call bci='%d'", jvms->bci());
+  }
+
+  // If the receiver is a constant null, do not torture the system
+  // by attempting to call through it.  The compile will proceed
+  // correctly, but may bail out in final_graph_reshaping, because
+  // the call instruction will have a seemingly deficient out-count.
+  // (The bailout says something misleading about an "infinite loop".)
+  if (kit.gvn().type(receiver)->higher_equal(TypePtr::NULL_PTR)) {
+    kit.inc_sp(method()->arg_size());  // restore arguments
+    kit.uncommon_trap(Deoptimization::Reason_null_check,
+                      Deoptimization::Action_none,
+                      NULL, "null receiver");
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Ideally we would unconditionally do a null check here and let it
+  // be converted to an implicit check based on profile information.
+  // However currently the conversion to implicit null checks in
+  // Block::implicit_null_check() only looks for loads and stores, not calls.
+  ciMethod *caller = kit.method();
+  ciMethodData *caller_md = (caller == NULL) ? NULL : caller->method_data();
+  if (!UseInlineCaches || !ImplicitNullChecks ||
+       ((ImplicitNullCheckThreshold > 0) && caller_md &&
+       (caller_md->trap_count(Deoptimization::Reason_null_check)
+       >= (uint)ImplicitNullCheckThreshold))) {
+    // Make an explicit receiver null_check as part of this call.
+    // Since we share a map with the caller, his JVMS gets adjusted.
+    receiver = kit.null_check_receiver(method());
+    if (kit.stopped()) {
+      // And dump it back to the caller, decorated with any exceptions:
+      return kit.transfer_exceptions_into_jvms();
+    }
+  }
+
+  assert(!method()->is_static(), "virtual call must not be to static");
+  assert(!method()->is_final(), "virtual call should not be to final");
+  assert(!method()->is_private(), "virtual call should not be to private");
+  assert(_vtable_index == methodOopDesc::invalid_vtable_index || !UseInlineCaches,
+         "no vtable calls if +UseInlineCaches ");
+  address target = SharedRuntime::get_resolve_virtual_call_stub();
+  // Normal inline cache used for call
+  CallDynamicJavaNode *call = new (kit.C, tf()->domain()->cnt()) CallDynamicJavaNode(tf(), target, method(), _vtable_index, kit.bci());
+  kit.set_arguments_for_java_call(call);
+  kit.set_edges_for_java_call(call);
+  Node* ret = kit.set_results_for_java_call(call);
+  kit.push_node(method()->return_type()->basic_type(), ret);
+
+  // Represent the effect of an implicit receiver null_check
+  // as part of this call.  Since we share a map with the caller,
+  // his JVMS gets adjusted.
+  kit.cast_not_null(receiver);
+  return kit.transfer_exceptions_into_jvms();
+}
+
+bool ParseGenerator::can_parse(ciMethod* m, int entry_bci) {
+  // Certain methods cannot be parsed at all:
+  if (!m->can_be_compiled())              return false;
+  if (!m->has_balanced_monitors())        return false;
+  if (m->get_flow_analysis()->failing())  return false;
+
+  // (Methods may bail out for other reasons, after the parser is run.
+  // We try to avoid this, but if forced, we must return (Node*)NULL.
+  // The user of the CallGenerator must check for this condition.)
+  return true;
+}
+
+CallGenerator* CallGenerator::for_inline(ciMethod* m, float expected_uses) {
+  if (!ParseGenerator::can_parse(m))  return NULL;
+  return new ParseGenerator(m, expected_uses);
+}
+
+// As a special case, the JVMS passed to this CallGenerator is
+// for the method execution already in progress, not just the JVMS
+// of the caller.  Thus, this CallGenerator cannot be mixed with others!
+CallGenerator* CallGenerator::for_osr(ciMethod* m, int osr_bci) {
+  if (!ParseGenerator::can_parse(m, true))  return NULL;
+  float past_uses = m->interpreter_invocation_count();
+  float expected_uses = past_uses;
+  return new ParseGenerator(m, expected_uses, true);
+}
+
+CallGenerator* CallGenerator::for_direct_call(ciMethod* m) {
+  assert(!m->is_abstract(), "for_direct_call mismatch");
+  return new DirectCallGenerator(m);
+}
+
+CallGenerator* CallGenerator::for_virtual_call(ciMethod* m, int vtable_index) {
+  assert(!m->is_static(), "for_virtual_call mismatch");
+  return new VirtualCallGenerator(m, vtable_index);
+}
+
+
+//---------------------------WarmCallGenerator--------------------------------
+// Internal class which handles initial deferral of inlining decisions.
+class WarmCallGenerator : public CallGenerator {
+  WarmCallInfo*   _call_info;
+  CallGenerator*  _if_cold;
+  CallGenerator*  _if_hot;
+  bool            _is_virtual;   // caches virtuality of if_cold
+  bool            _is_inline;    // caches inline-ness of if_hot
+
+public:
+  WarmCallGenerator(WarmCallInfo* ci,
+                    CallGenerator* if_cold,
+                    CallGenerator* if_hot)
+    : CallGenerator(if_cold->method())
+  {
+    assert(method() == if_hot->method(), "consistent choices");
+    _call_info  = ci;
+    _if_cold    = if_cold;
+    _if_hot     = if_hot;
+    _is_virtual = if_cold->is_virtual();
+    _is_inline  = if_hot->is_inline();
+  }
+
+  virtual bool      is_inline() const           { return _is_inline; }
+  virtual bool      is_virtual() const          { return _is_virtual; }
+  virtual bool      is_deferred() const         { return true; }
+
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_warm_call(WarmCallInfo* ci,
+                                            CallGenerator* if_cold,
+                                            CallGenerator* if_hot) {
+  return new WarmCallGenerator(ci, if_cold, if_hot);
+}
+
+JVMState* WarmCallGenerator::generate(JVMState* jvms) {
+  Compile* C = Compile::current();
+  if (C->log() != NULL) {
+    C->log()->elem("warm_call bci='%d'", jvms->bci());
+  }
+  jvms = _if_cold->generate(jvms);
+  if (jvms != NULL) {
+    Node* m = jvms->map()->control();
+    if (m->is_CatchProj()) m = m->in(0);  else m = C->top();
+    if (m->is_Catch())     m = m->in(0);  else m = C->top();
+    if (m->is_Proj())      m = m->in(0);  else m = C->top();
+    if (m->is_CallJava()) {
+      _call_info->set_call(m->as_Call());
+      _call_info->set_hot_cg(_if_hot);
+#ifndef PRODUCT
+      if (PrintOpto || PrintOptoInlining) {
+        tty->print_cr("Queueing for warm inlining at bci %d:", jvms->bci());
+        tty->print("WCI: ");
+        _call_info->print();
+      }
+#endif
+      _call_info->set_heat(_call_info->compute_heat());
+      C->set_warm_calls(_call_info->insert_into(C->warm_calls()));
+    }
+  }
+  return jvms;
+}
+
+void WarmCallInfo::make_hot() {
+  Compile* C = Compile::current();
+  // Replace the callnode with something better.
+  CallJavaNode* call = this->call()->as_CallJava();
+  ciMethod* method   = call->method();
+  int       nargs    = method->arg_size();
+  JVMState* jvms     = call->jvms()->clone_shallow(C);
+  uint size = TypeFunc::Parms + MAX2(2, nargs);
+  SafePointNode* map = new (C, size) SafePointNode(size, jvms);
+  for (uint i1 = 0; i1 < (uint)(TypeFunc::Parms + nargs); i1++) {
+    map->init_req(i1, call->in(i1));
+  }
+  jvms->set_map(map);
+  jvms->set_offsets(map->req());
+  jvms->set_locoff(TypeFunc::Parms);
+  jvms->set_stkoff(TypeFunc::Parms);
+  GraphKit kit(jvms);
+
+  JVMState* new_jvms = _hot_cg->generate(kit.jvms());
+  if (new_jvms == NULL)  return;  // no change
+  if (C->failing())      return;
+
+  kit.set_jvms(new_jvms);
+  Node* res = C->top();
+  int   res_size = method->return_type()->size();
+  if (res_size != 0) {
+    kit.inc_sp(-res_size);
+    res = kit.argument(0);
+  }
+  GraphKit ekit(kit.combine_and_pop_all_exception_states()->jvms());
+
+  // Replace the call:
+  for (DUIterator i = call->outs(); call->has_out(i); i++) {
+    Node* n = call->out(i);
+    Node* nn = NULL;  // replacement
+    if (n->is_Proj()) {
+      ProjNode* nproj = n->as_Proj();
+      assert(nproj->_con < (uint)(TypeFunc::Parms + (res_size ? 1 : 0)), "sane proj");
+      if (nproj->_con == TypeFunc::Parms) {
+        nn = res;
+      } else {
+        nn = kit.map()->in(nproj->_con);
+      }
+      if (nproj->_con == TypeFunc::I_O) {
+        for (DUIterator j = nproj->outs(); nproj->has_out(j); j++) {
+          Node* e = nproj->out(j);
+          if (e->Opcode() == Op_CreateEx) {
+            e->replace_by(ekit.argument(0));
+          } else if (e->Opcode() == Op_Catch) {
+            for (DUIterator k = e->outs(); e->has_out(k); k++) {
+              CatchProjNode* p = e->out(j)->as_CatchProj();
+              if (p->is_handler_proj()) {
+                p->replace_by(ekit.control());
+              } else {
+                p->replace_by(kit.control());
+              }
+            }
+          }
+        }
+      }
+    }
+    NOT_PRODUCT(if (!nn)  n->dump(2));
+    assert(nn != NULL, "don't know what to do with this user");
+    n->replace_by(nn);
+  }
+}
+
+void WarmCallInfo::make_cold() {
+  // No action:  Just dequeue.
+}
+
+
+//------------------------PredictedCallGenerator------------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+class PredictedCallGenerator : public CallGenerator {
+  ciKlass*       _predicted_receiver;
+  CallGenerator* _if_missed;
+  CallGenerator* _if_hit;
+  float          _hit_prob;
+
+public:
+  PredictedCallGenerator(ciKlass* predicted_receiver,
+                         CallGenerator* if_missed,
+                         CallGenerator* if_hit, float hit_prob)
+    : CallGenerator(if_missed->method())
+  {
+    // The call profile data may predict the hit_prob as extreme as 0 or 1.
+    // Remove the extremes values from the range.
+    if (hit_prob > PROB_MAX)   hit_prob = PROB_MAX;
+    if (hit_prob < PROB_MIN)   hit_prob = PROB_MIN;
+
+    _predicted_receiver = predicted_receiver;
+    _if_missed          = if_missed;
+    _if_hit             = if_hit;
+    _hit_prob           = hit_prob;
+  }
+
+  virtual bool      is_virtual()   const    { return true; }
+  virtual bool      is_inline()    const    { return _if_hit->is_inline(); }
+  virtual bool      is_deferred()  const    { return _if_hit->is_deferred(); }
+
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator* CallGenerator::for_predicted_call(ciKlass* predicted_receiver,
+                                                 CallGenerator* if_missed,
+                                                 CallGenerator* if_hit,
+                                                 float hit_prob) {
+  return new PredictedCallGenerator(predicted_receiver, if_missed, if_hit, hit_prob);
+}
+
+
+JVMState* PredictedCallGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  PhaseGVN& gvn = kit.gvn();
+  // We need an explicit receiver null_check before checking its type.
+  // We share a map with the caller, so his JVMS gets adjusted.
+  Node* receiver = kit.argument(0);
+
+  CompileLog* log = kit.C->log();
+  if (log != NULL) {
+    log->elem("predicted_call bci='%d' klass='%d'",
+              jvms->bci(), log->identify(_predicted_receiver));
+  }
+
+  receiver = kit.null_check_receiver(method());
+  if (kit.stopped()) {
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  Node* exact_receiver = receiver;  // will get updated in place...
+  Node* slow_ctl = kit.type_check_receiver(receiver,
+                                           _predicted_receiver, _hit_prob,
+                                           &exact_receiver);
+
+  SafePointNode* slow_map = NULL;
+  JVMState* slow_jvms;
+  { PreserveJVMState pjvms(&kit);
+    kit.set_control(slow_ctl);
+    if (!kit.stopped()) {
+      slow_jvms = _if_missed->generate(kit.sync_jvms());
+      assert(slow_jvms != NULL, "miss path must not fail to generate");
+      kit.add_exception_states_from(slow_jvms);
+      kit.set_map(slow_jvms->map());
+      if (!kit.stopped())
+        slow_map = kit.stop();
+    }
+  }
+
+  // fall through if the instance exactly matches the desired type
+  kit.replace_in_map(receiver, exact_receiver);
+
+  // Make the hot call:
+  JVMState* new_jvms = _if_hit->generate(kit.sync_jvms());
+  if (new_jvms == NULL) {
+    // Inline failed, so make a direct call.
+    assert(_if_hit->is_inline(), "must have been a failed inline");
+    CallGenerator* cg = CallGenerator::for_direct_call(_if_hit->method());
+    new_jvms = cg->generate(kit.sync_jvms());
+  }
+  kit.add_exception_states_from(new_jvms);
+  kit.set_jvms(new_jvms);
+
+  // Need to merge slow and fast?
+  if (slow_map == NULL) {
+    // The fast path is the only path remaining.
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  if (kit.stopped()) {
+    // Inlined method threw an exception, so it's just the slow path after all.
+    kit.set_jvms(slow_jvms);
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  // Finish the diamond.
+  kit.C->set_has_split_ifs(true); // Has chance for split-if optimization
+  RegionNode* region = new (kit.C, 3) RegionNode(3);
+  region->init_req(1, kit.control());
+  region->init_req(2, slow_map->control());
+  kit.set_control(gvn.transform(region));
+  Node* iophi = PhiNode::make(region, kit.i_o(), Type::ABIO);
+  iophi->set_req(2, slow_map->i_o());
+  kit.set_i_o(gvn.transform(iophi));
+  kit.merge_memory(slow_map->merged_memory(), region, 2);
+  uint tos = kit.jvms()->stkoff() + kit.sp();
+  uint limit = slow_map->req();
+  for (uint i = TypeFunc::Parms; i < limit; i++) {
+    // Skip unused stack slots; fast forward to monoff();
+    if (i == tos) {
+      i = kit.jvms()->monoff();
+      if( i >= limit ) break;
+    }
+    Node* m = kit.map()->in(i);
+    Node* n = slow_map->in(i);
+    if (m != n) {
+      const Type* t = gvn.type(m)->meet(gvn.type(n));
+      Node* phi = PhiNode::make(region, m, t);
+      phi->set_req(2, n);
+      kit.map()->set_req(i, gvn.transform(phi));
+    }
+  }
+  return kit.transfer_exceptions_into_jvms();
+}
+
+
+//-------------------------UncommonTrapCallGenerator-----------------------------
+// Internal class which handles all out-of-line calls checking receiver type.
+class UncommonTrapCallGenerator : public CallGenerator {
+  Deoptimization::DeoptReason _reason;
+  Deoptimization::DeoptAction _action;
+
+public:
+  UncommonTrapCallGenerator(ciMethod* m,
+                            Deoptimization::DeoptReason reason,
+                            Deoptimization::DeoptAction action)
+    : CallGenerator(m)
+  {
+    _reason = reason;
+    _action = action;
+  }
+
+  virtual bool      is_virtual() const          { ShouldNotReachHere(); return false; }
+  virtual bool      is_trap() const             { return true; }
+
+  virtual JVMState* generate(JVMState* jvms);
+};
+
+
+CallGenerator*
+CallGenerator::for_uncommon_trap(ciMethod* m,
+                                 Deoptimization::DeoptReason reason,
+                                 Deoptimization::DeoptAction action) {
+  return new UncommonTrapCallGenerator(m, reason, action);
+}
+
+
+JVMState* UncommonTrapCallGenerator::generate(JVMState* jvms) {
+  GraphKit kit(jvms);
+  // Take the trap with arguments pushed on the stack.  (Cf. null_check_receiver).
+  int nargs = method()->arg_size();
+  kit.inc_sp(nargs);
+  assert(nargs <= kit.sp() && kit.sp() <= jvms->stk_size(), "sane sp w/ args pushed");
+  if (_reason == Deoptimization::Reason_class_check &&
+      _action == Deoptimization::Action_maybe_recompile) {
+    // Temp fix for 6529811
+    // Don't allow uncommon_trap to override our decision to recompile in the event
+    // of a class cast failure for a monomorphic call as it will never let us convert
+    // the call to either bi-morphic or megamorphic and can lead to unc-trap loops
+    bool keep_exact_action = true;
+    kit.uncommon_trap(_reason, _action, NULL, "monomorphic vcall checkcast", false, keep_exact_action);
+  } else {
+    kit.uncommon_trap(_reason, _action);
+  }
+  return kit.transfer_exceptions_into_jvms();
+}
+
+// (Note:  Moved hook_up_call to GraphKit::set_edges_for_java_call.)
+
+// (Node:  Merged hook_up_exits into ParseGenerator::generate.)
+
+#define NODES_OVERHEAD_PER_METHOD (30.0)
+#define NODES_PER_BYTECODE (9.5)
+
+void WarmCallInfo::init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor) {
+  int call_count = profile.count();
+  int code_size = call_method->code_size();
+
+  // Expected execution count is based on the historical count:
+  _count = call_count < 0 ? 1 : call_site->method()->scale_count(call_count, prof_factor);
+
+  // Expected profit from inlining, in units of simple call-overheads.
+  _profit = 1.0;
+
+  // Expected work performed by the call in units of call-overheads.
+  // %%% need an empirical curve fit for "work" (time in call)
+  float bytecodes_per_call = 3;
+  _work = 1.0 + code_size / bytecodes_per_call;
+
+  // Expected size of compilation graph:
+  // -XX:+PrintParseStatistics once reported:
+  //  Methods seen: 9184  Methods parsed: 9184  Nodes created: 1582391
+  //  Histogram of 144298 parsed bytecodes:
+  // %%% Need an better predictor for graph size.
+  _size = NODES_OVERHEAD_PER_METHOD + (NODES_PER_BYTECODE * code_size);
+}
+
+// is_cold:  Return true if the node should never be inlined.
+// This is true if any of the key metrics are extreme.
+bool WarmCallInfo::is_cold() const {
+  if (count()  <  WarmCallMinCount)        return true;
+  if (profit() <  WarmCallMinProfit)       return true;
+  if (work()   >  WarmCallMaxWork)         return true;
+  if (size()   >  WarmCallMaxSize)         return true;
+  return false;
+}
+
+// is_hot:  Return true if the node should be inlined immediately.
+// This is true if any of the key metrics are extreme.
+bool WarmCallInfo::is_hot() const {
+  assert(!is_cold(), "eliminate is_cold cases before testing is_hot");
+  if (count()  >= HotCallCountThreshold)   return true;
+  if (profit() >= HotCallProfitThreshold)  return true;
+  if (work()   <= HotCallTrivialWork)      return true;
+  if (size()   <= HotCallTrivialSize)      return true;
+  return false;
+}
+
+// compute_heat:
+float WarmCallInfo::compute_heat() const {
+  assert(!is_cold(), "compute heat only on warm nodes");
+  assert(!is_hot(),  "compute heat only on warm nodes");
+  int min_size = MAX2(0,   (int)HotCallTrivialSize);
+  int max_size = MIN2(500, (int)WarmCallMaxSize);
+  float method_size = (size() - min_size) / MAX2(1, max_size - min_size);
+  float size_factor;
+  if      (method_size < 0.05)  size_factor = 4;   // 2 sigmas better than avg.
+  else if (method_size < 0.15)  size_factor = 2;   // 1 sigma better than avg.
+  else if (method_size < 0.5)   size_factor = 1;   // better than avg.
+  else                          size_factor = 0.5; // worse than avg.
+  return (count() * profit() * size_factor);
+}
+
+bool WarmCallInfo::warmer_than(WarmCallInfo* that) {
+  assert(this != that, "compare only different WCIs");
+  assert(this->heat() != 0 && that->heat() != 0, "call compute_heat 1st");
+  if (this->heat() > that->heat())   return true;
+  if (this->heat() < that->heat())   return false;
+  assert(this->heat() == that->heat(), "no NaN heat allowed");
+  // Equal heat.  Break the tie some other way.
+  if (!this->call() || !that->call())  return (address)this > (address)that;
+  return this->call()->_idx > that->call()->_idx;
+}
+
+//#define UNINIT_NEXT ((WarmCallInfo*)badAddress)
+#define UNINIT_NEXT ((WarmCallInfo*)NULL)
+
+WarmCallInfo* WarmCallInfo::insert_into(WarmCallInfo* head) {
+  assert(next() == UNINIT_NEXT, "not yet on any list");
+  WarmCallInfo* prev_p = NULL;
+  WarmCallInfo* next_p = head;
+  while (next_p != NULL && next_p->warmer_than(this)) {
+    prev_p = next_p;
+    next_p = prev_p->next();
+  }
+  // Install this between prev_p and next_p.
+  this->set_next(next_p);
+  if (prev_p == NULL)
+    head = this;
+  else
+    prev_p->set_next(this);
+  return head;
+}
+
+WarmCallInfo* WarmCallInfo::remove_from(WarmCallInfo* head) {
+  WarmCallInfo* prev_p = NULL;
+  WarmCallInfo* next_p = head;
+  while (next_p != this) {
+    assert(next_p != NULL, "this must be in the list somewhere");
+    prev_p = next_p;
+    next_p = prev_p->next();
+  }
+  next_p = this->next();
+  debug_only(this->set_next(UNINIT_NEXT));
+  // Remove this from between prev_p and next_p.
+  if (prev_p == NULL)
+    head = next_p;
+  else
+    prev_p->set_next(next_p);
+  return head;
+}
+
+WarmCallInfo* WarmCallInfo::_always_hot  = NULL;
+WarmCallInfo* WarmCallInfo::_always_cold = NULL;
+
+WarmCallInfo* WarmCallInfo::always_hot() {
+  if (_always_hot == NULL) {
+    static double bits[sizeof(WarmCallInfo) / sizeof(double) + 1] = {0};
+    WarmCallInfo* ci = (WarmCallInfo*) bits;
+    ci->_profit = ci->_count = MAX_VALUE();
+    ci->_work   = ci->_size  = MIN_VALUE();
+    _always_hot = ci;
+  }
+  assert(_always_hot->is_hot(), "must always be hot");
+  return _always_hot;
+}
+
+WarmCallInfo* WarmCallInfo::always_cold() {
+  if (_always_cold == NULL) {
+    static double bits[sizeof(WarmCallInfo) / sizeof(double) + 1] = {0};
+    WarmCallInfo* ci = (WarmCallInfo*) bits;
+    ci->_profit = ci->_count = MIN_VALUE();
+    ci->_work   = ci->_size  = MAX_VALUE();
+    _always_cold = ci;
+  }
+  assert(_always_cold->is_cold(), "must always be cold");
+  return _always_cold;
+}
+
+
+#ifndef PRODUCT
+
+void WarmCallInfo::print() const {
+  tty->print("%s : C=%6.1f P=%6.1f W=%6.1f S=%6.1f H=%6.1f -> %p",
+             is_cold() ? "cold" : is_hot() ? "hot " : "warm",
+             count(), profit(), work(), size(), compute_heat(), next());
+  tty->cr();
+  if (call() != NULL)  call()->dump();
+}
+
+void print_wci(WarmCallInfo* ci) {
+  ci->print();
+}
+
+void WarmCallInfo::print_all() const {
+  for (const WarmCallInfo* p = this; p != NULL; p = p->next())
+    p->print();
+}
+
+int WarmCallInfo::count_all() const {
+  int cnt = 0;
+  for (const WarmCallInfo* p = this; p != NULL; p = p->next())
+    cnt++;
+  return cnt;
+}
+
+#endif //PRODUCT
diff --git a/src/share/vm/opto/callGenerator.hpp b/src/share/vm/opto/callGenerator.hpp
new file mode 100644
index 000000000..bbd47ca4a
--- /dev/null
+++ b/src/share/vm/opto/callGenerator.hpp
@@ -0,0 +1,266 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//---------------------------CallGenerator-------------------------------------
+// The subclasses of this class handle generation of ideal nodes for
+// call sites and method entry points.
+
+class CallGenerator : public ResourceObj {
+ public:
+  enum {
+    xxxunusedxxx
+  };
+
+ private:
+  ciMethod*             _method;                // The method being called.
+
+ protected:
+  CallGenerator(ciMethod* method);
+
+ public:
+  // Accessors
+  ciMethod*         method() const              { return _method; }
+
+  // is_inline: At least some code implementing the method is copied here.
+  virtual bool      is_inline() const           { return false; }
+  // is_intrinsic: There's a method-specific way of generating the inline code.
+  virtual bool      is_intrinsic() const        { return false; }
+  // is_parse: Bytecodes implementing the specific method are copied here.
+  virtual bool      is_parse() const            { return false; }
+  // is_virtual: The call uses the receiver type to select or check the method.
+  virtual bool      is_virtual() const          { return false; }
+  // is_deferred: The decision whether to inline or not is deferred.
+  virtual bool      is_deferred() const         { return false; }
+  // is_predicted: Uses an explicit check against a predicted type.
+  virtual bool      is_predicted() const        { return false; }
+  // is_trap: Does not return to the caller.  (E.g., uncommon trap.)
+  virtual bool      is_trap() const             { return false; }
+
+  // Note:  It is possible for a CG to be both inline and virtual.
+  // (The hashCode intrinsic does a vtable check and an inlined fast path.)
+
+  // Utilities:
+  const TypeFunc*   tf() const;
+
+  // The given jvms has state and arguments for a call to my method.
+  // Edges after jvms->argoff() carry all (pre-popped) argument values.
+  //
+  // Update the map with state and return values (if any) and return it.
+  // The return values (0, 1, or 2) must be pushed on the map's stack,
+  // and the sp of the jvms incremented accordingly.
+  //
+  // The jvms is returned on success.  Alternatively, a copy of the
+  // given jvms, suitably updated, may be returned, in which case the
+  // caller should discard the original jvms.
+  //
+  // The non-Parm edges of the returned map will contain updated global state,
+  // and one or two edges before jvms->sp() will carry any return values.
+  // Other map edges may contain locals or monitors, and should not
+  // be changed in meaning.
+  //
+  // If the call traps, the returned map must have a control edge of top.
+  // If the call can throw, the returned map must report has_exceptions().
+  //
+  // If the result is NULL, it means that this CallGenerator was unable
+  // to handle the given call, and another CallGenerator should be consulted.
+  virtual JVMState* generate(JVMState* jvms) = 0;
+
+  // How to generate a call site that is inlined:
+  static CallGenerator* for_inline(ciMethod* m, float expected_uses = -1);
+  // How to generate code for an on-stack replacement handler.
+  static CallGenerator* for_osr(ciMethod* m, int osr_bci);
+
+  // How to generate vanilla out-of-line call sites:
+  static CallGenerator* for_direct_call(ciMethod* m);   // static, special
+  static CallGenerator* for_virtual_call(ciMethod* m, int vtable_index);  // virtual, interface
+
+  // How to make a call but defer the decision whether to inline or not.
+  static CallGenerator* for_warm_call(WarmCallInfo* ci,
+                                      CallGenerator* if_cold,
+                                      CallGenerator* if_hot);
+
+  // How to make a call that optimistically assumes a receiver type:
+  static CallGenerator* for_predicted_call(ciKlass* predicted_receiver,
+                                           CallGenerator* if_missed,
+                                           CallGenerator* if_hit,
+                                           float hit_prob);
+
+  // How to make a call that gives up and goes back to the interpreter:
+  static CallGenerator* for_uncommon_trap(ciMethod* m,
+                                          Deoptimization::DeoptReason reason,
+                                          Deoptimization::DeoptAction action);
+
+  // Registry for intrinsics:
+  static CallGenerator* for_intrinsic(ciMethod* m);
+  static void register_intrinsic(ciMethod* m, CallGenerator* cg);
+};
+
+class InlineCallGenerator : public CallGenerator {
+  virtual bool      is_inline() const           { return true; }
+
+ protected:
+  InlineCallGenerator(ciMethod* method) : CallGenerator(method) { }
+};
+
+
+//---------------------------WarmCallInfo--------------------------------------
+// A struct to collect information about a given call site.
+// Helps sort call sites into "hot", "medium", and "cold".
+// Participates in the queueing of "medium" call sites for possible inlining.
+class WarmCallInfo : public ResourceObj {
+ private:
+
+  CallNode*     _call;   // The CallNode which may be inlined.
+  CallGenerator* _hot_cg;// CG for expanding the call node
+
+  // These are the metrics we use to evaluate call sites:
+
+  float         _count;  // How often do we expect to reach this site?
+  float         _profit; // How much time do we expect to save by inlining?
+  float         _work;   // How long do we expect the average call to take?
+  float         _size;   // How big do we expect the inlined code to be?
+
+  float         _heat;   // Combined score inducing total order on call sites.
+  WarmCallInfo* _next;   // Next cooler call info in pending queue.
+
+  // Count is the number of times this call site is expected to be executed.
+  // Large count is favorable for inlining, because the extra compilation
+  // work will be amortized more completely.
+
+  // Profit is a rough measure of the amount of time we expect to save
+  // per execution of this site if we inline it.  (1.0 == call overhead)
+  // Large profit favors inlining.  Negative profit disables inlining.
+
+  // Work is a rough measure of the amount of time a typical out-of-line
+  // call from this site is expected to take.  (1.0 == call, no-op, return)
+  // Small work is somewhat favorable for inlining, since methods with
+  // short "hot" traces are more likely to inline smoothly.
+
+  // Size is the number of graph nodes we expect this method to produce,
+  // not counting the inlining of any further warm calls it may include.
+  // Small size favors inlining, since small methods are more likely to
+  // inline smoothly.  The size is estimated by examining the native code
+  // if available.  The method bytecodes are also examined, assuming
+  // empirically observed node counts for each kind of bytecode.
+
+  // Heat is the combined "goodness" of a site's inlining.  If we were
+  // omniscient, it would be the difference of two sums of future execution
+  // times of code emitted for this site (amortized across multiple sites if
+  // sharing applies).  The two sums are for versions of this call site with
+  // and without inlining.
+
+  // We approximate this mythical quantity by playing with averages,
+  // rough estimates, and assumptions that history repeats itself.
+  // The basic formula count * profit is heuristically adjusted
+  // by looking at the expected compilation and execution times of
+  // of the inlined call.
+
+  // Note:  Some of these metrics may not be present in the final product,
+  // but exist in development builds to experiment with inline policy tuning.
+
+  // This heuristic framework does not model well the very significant
+  // effects of multiple-level inlining.  It is possible to see no immediate
+  // profit from inlining X->Y, but to get great profit from a subsequent
+  // inlining X->Y->Z.
+
+  // This framework does not take well into account the problem of N**2 code
+  // size in a clique of mutually inlinable methods.
+
+  WarmCallInfo*  next() const          { return _next; }
+  void       set_next(WarmCallInfo* n) { _next = n; }
+
+  static WarmCallInfo* _always_hot;
+  static WarmCallInfo* _always_cold;
+
+ public:
+  // Because WarmInfo objects live over the entire lifetime of the
+  // Compile object, they are allocated into the comp_arena, which
+  // does not get resource marked or reset during the compile process
+  void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); }
+  void operator delete( void * ) { } // fast deallocation
+
+  static WarmCallInfo* always_hot();
+  static WarmCallInfo* always_cold();
+
+  WarmCallInfo() {
+    _call = NULL;
+    _hot_cg = NULL;
+    _next = NULL;
+    _count = _profit = _work = _size = _heat = 0;
+  }
+
+  CallNode* call() const { return _call; }
+  float count()    const { return _count; }
+  float size()     const { return _size; }
+  float work()     const { return _work; }
+  float profit()   const { return _profit; }
+  float heat()     const { return _heat; }
+
+  void set_count(float x)     { _count = x; }
+  void set_size(float x)      { _size = x; }
+  void set_work(float x)      { _work = x; }
+  void set_profit(float x)    { _profit = x; }
+  void set_heat(float x)      { _heat = x; }
+
+  // Load initial heuristics from profiles, etc.
+  // The heuristics can be tweaked further by the caller.
+  void init(JVMState* call_site, ciMethod* call_method, ciCallProfile& profile, float prof_factor);
+
+  static float MAX_VALUE() { return +1.0e10; }
+  static float MIN_VALUE() { return -1.0e10; }
+
+  float compute_heat() const;
+
+  void set_call(CallNode* call)      { _call = call; }
+  void set_hot_cg(CallGenerator* cg) { _hot_cg = cg; }
+
+  // Do not queue very hot or very cold calls.
+  // Make very cold ones out of line immediately.
+  // Inline very hot ones immediately.
+  // These queries apply various tunable limits
+  // to the above metrics in a systematic way.
+  // Test for coldness before testing for hotness.
+  bool is_cold() const;
+  bool is_hot() const;
+
+  // Force a warm call to be hot.  This worklists the call node for inlining.
+  void make_hot();
+
+  // Force a warm call to be cold.  This worklists the call node for out-of-lining.
+  void make_cold();
+
+  // A reproducible total ordering, in which heat is the major key.
+  bool warmer_than(WarmCallInfo* that);
+
+  // List management.  These methods are called with the list head,
+  // and return the new list head, inserting or removing the receiver.
+  WarmCallInfo* insert_into(WarmCallInfo* head);
+  WarmCallInfo* remove_from(WarmCallInfo* head);
+
+#ifndef PRODUCT
+  void print() const;
+  void print_all() const;
+  int count_all() const;
+#endif
+};
diff --git a/src/share/vm/opto/callnode.cpp b/src/share/vm/opto/callnode.cpp
new file mode 100644
index 000000000..7fb600440
--- /dev/null
+++ b/src/share/vm/opto/callnode.cpp
@@ -0,0 +1,1311 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_callnode.cpp.incl"
+
+//=============================================================================
+uint StartNode::size_of() const { return sizeof(*this); }
+uint StartNode::cmp( const Node &n ) const
+{ return _domain == ((StartNode&)n)._domain; }
+const Type *StartNode::bottom_type() const { return _domain; }
+const Type *StartNode::Value(PhaseTransform *phase) const { return _domain; }
+#ifndef PRODUCT
+void StartNode::dump_spec(outputStream *st) const { st->print(" #"); _domain->dump_on(st);}
+#endif
+
+//------------------------------Ideal------------------------------------------
+Node *StartNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------calling_convention-----------------------------
+void StartNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+  Matcher::calling_convention( sig_bt, parm_regs, argcnt, false );
+}
+
+//------------------------------Registers--------------------------------------
+const RegMask &StartNode::in_RegMask(uint) const {
+  return RegMask::Empty;
+}
+
+//------------------------------match------------------------------------------
+// Construct projections for incoming parameters, and their RegMask info
+Node *StartNode::match( const ProjNode *proj, const Matcher *match ) {
+  switch (proj->_con) {
+  case TypeFunc::Control:
+  case TypeFunc::I_O:
+  case TypeFunc::Memory:
+    return new (match->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+  case TypeFunc::FramePtr:
+    return new (match->C, 1) MachProjNode(this,proj->_con,Matcher::c_frame_ptr_mask, Op_RegP);
+  case TypeFunc::ReturnAdr:
+    return new (match->C, 1) MachProjNode(this,proj->_con,match->_return_addr_mask,Op_RegP);
+  case TypeFunc::Parms:
+  default: {
+      uint parm_num = proj->_con - TypeFunc::Parms;
+      const Type *t = _domain->field_at(proj->_con);
+      if (t->base() == Type::Half)  // 2nd half of Longs and Doubles
+        return new (match->C, 1) ConNode(Type::TOP);
+      uint ideal_reg = Matcher::base2reg[t->base()];
+      RegMask &rm = match->_calling_convention_mask[parm_num];
+      return new (match->C, 1) MachProjNode(this,proj->_con,rm,ideal_reg);
+    }
+  }
+  return NULL;
+}
+
+//------------------------------StartOSRNode----------------------------------
+// The method start node for an on stack replacement adapter
+
+//------------------------------osr_domain-----------------------------
+const TypeTuple *StartOSRNode::osr_domain() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM;  // address of osr buffer
+
+  return TypeTuple::make(TypeFunc::Parms+1, fields);
+}
+
+//=============================================================================
+const char * const ParmNode::names[TypeFunc::Parms+1] = {
+  "Control", "I_O", "Memory", "FramePtr", "ReturnAdr", "Parms"
+};
+
+#ifndef PRODUCT
+void ParmNode::dump_spec(outputStream *st) const {
+  if( _con < TypeFunc::Parms ) {
+    st->print(names[_con]);
+  } else {
+    st->print("Parm%d: ",_con-TypeFunc::Parms);
+    // Verbose and WizardMode dump bottom_type for all nodes
+    if( !Verbose && !WizardMode )   bottom_type()->dump_on(st);
+  }
+}
+#endif
+
+uint ParmNode::ideal_reg() const {
+  switch( _con ) {
+  case TypeFunc::Control  : // fall through
+  case TypeFunc::I_O      : // fall through
+  case TypeFunc::Memory   : return 0;
+  case TypeFunc::FramePtr : // fall through
+  case TypeFunc::ReturnAdr: return Op_RegP;
+  default                 : assert( _con > TypeFunc::Parms, "" );
+    // fall through
+  case TypeFunc::Parms    : {
+    // Type of argument being passed
+    const Type *t = in(0)->as_Start()->_domain->field_at(_con);
+    return Matcher::base2reg[t->base()];
+  }
+  }
+  ShouldNotReachHere();
+  return 0;
+}
+
+//=============================================================================
+ReturnNode::ReturnNode(uint edges, Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *retadr ) : Node(edges) {
+  init_req(TypeFunc::Control,cntrl);
+  init_req(TypeFunc::I_O,i_o);
+  init_req(TypeFunc::Memory,memory);
+  init_req(TypeFunc::FramePtr,frameptr);
+  init_req(TypeFunc::ReturnAdr,retadr);
+}
+
+Node *ReturnNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+const Type *ReturnNode::Value( PhaseTransform *phase ) const {
+  return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
+    ? Type::TOP
+    : Type::BOTTOM;
+}
+
+// Do we Match on this edge index or not?  No edges on return nodes
+uint ReturnNode::match_edge(uint idx) const {
+  return 0;
+}
+
+
+#ifndef PRODUCT
+void ReturnNode::dump_req() const {
+  // Dump the required inputs, enclosed in '(' and ')'
+  uint i;                       // Exit value of loop
+  for( i=0; i<req(); i++ ) {    // For all required inputs
+    if( i == TypeFunc::Parms ) tty->print("returns");
+    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else tty->print("_ ");
+  }
+}
+#endif
+
+//=============================================================================
+RethrowNode::RethrowNode(
+  Node* cntrl,
+  Node* i_o,
+  Node* memory,
+  Node* frameptr,
+  Node* ret_adr,
+  Node* exception
+) : Node(TypeFunc::Parms + 1) {
+  init_req(TypeFunc::Control  , cntrl    );
+  init_req(TypeFunc::I_O      , i_o      );
+  init_req(TypeFunc::Memory   , memory   );
+  init_req(TypeFunc::FramePtr , frameptr );
+  init_req(TypeFunc::ReturnAdr, ret_adr);
+  init_req(TypeFunc::Parms    , exception);
+}
+
+Node *RethrowNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+const Type *RethrowNode::Value( PhaseTransform *phase ) const {
+  return (phase->type(in(TypeFunc::Control)) == Type::TOP)
+    ? Type::TOP
+    : Type::BOTTOM;
+}
+
+uint RethrowNode::match_edge(uint idx) const {
+  return 0;
+}
+
+#ifndef PRODUCT
+void RethrowNode::dump_req() const {
+  // Dump the required inputs, enclosed in '(' and ')'
+  uint i;                       // Exit value of loop
+  for( i=0; i<req(); i++ ) {    // For all required inputs
+    if( i == TypeFunc::Parms ) tty->print("exception");
+    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else tty->print("_ ");
+  }
+}
+#endif
+
+//=============================================================================
+// Do we Match on this edge index or not?  Match only target address & method
+uint TailCallNode::match_edge(uint idx) const {
+  return TypeFunc::Parms <= idx  &&  idx <= TypeFunc::Parms+1;
+}
+
+//=============================================================================
+// Do we Match on this edge index or not?  Match only target address & oop
+uint TailJumpNode::match_edge(uint idx) const {
+  return TypeFunc::Parms <= idx  &&  idx <= TypeFunc::Parms+1;
+}
+
+//=============================================================================
+JVMState::JVMState(ciMethod* method, JVMState* caller) {
+  assert(method != NULL, "must be valid call site");
+  _method = method;
+  debug_only(_bci = -99);  // random garbage value
+  debug_only(_map = (SafePointNode*)-1);
+  _caller = caller;
+  _depth  = 1 + (caller == NULL ? 0 : caller->depth());
+  _locoff = TypeFunc::Parms;
+  _stkoff = _locoff + _method->max_locals();
+  _monoff = _stkoff + _method->max_stack();
+  _endoff = _monoff;
+  _sp = 0;
+}
+JVMState::JVMState(int stack_size) {
+  _method = NULL;
+  _bci = InvocationEntryBci;
+  debug_only(_map = (SafePointNode*)-1);
+  _caller = NULL;
+  _depth  = 1;
+  _locoff = TypeFunc::Parms;
+  _stkoff = _locoff;
+  _monoff = _stkoff + stack_size;
+  _endoff = _monoff;
+  _sp = 0;
+}
+
+//--------------------------------of_depth-------------------------------------
+JVMState* JVMState::of_depth(int d) const {
+  const JVMState* jvmp = this;
+  assert(0 < d && (uint)d <= depth(), "oob");
+  for (int skip = depth() - d; skip > 0; skip--) {
+    jvmp = jvmp->caller();
+  }
+  assert(jvmp->depth() == (uint)d, "found the right one");
+  return (JVMState*)jvmp;
+}
+
+//-----------------------------same_calls_as-----------------------------------
+bool JVMState::same_calls_as(const JVMState* that) const {
+  if (this == that)                    return true;
+  if (this->depth() != that->depth())  return false;
+  const JVMState* p = this;
+  const JVMState* q = that;
+  for (;;) {
+    if (p->_method != q->_method)    return false;
+    if (p->_method == NULL)          return true;   // bci is irrelevant
+    if (p->_bci    != q->_bci)       return false;
+    p = p->caller();
+    q = q->caller();
+    if (p == q)                      return true;
+    assert(p != NULL && q != NULL, "depth check ensures we don't run off end");
+  }
+}
+
+//------------------------------debug_start------------------------------------
+uint JVMState::debug_start()  const {
+  debug_only(JVMState* jvmroot = of_depth(1));
+  assert(jvmroot->locoff() <= this->locoff(), "youngest JVMState must be last");
+  return of_depth(1)->locoff();
+}
+
+//-------------------------------debug_end-------------------------------------
+uint JVMState::debug_end() const {
+  debug_only(JVMState* jvmroot = of_depth(1));
+  assert(jvmroot->endoff() <= this->endoff(), "youngest JVMState must be last");
+  return endoff();
+}
+
+//------------------------------debug_depth------------------------------------
+uint JVMState::debug_depth() const {
+  uint total = 0;
+  for (const JVMState* jvmp = this; jvmp != NULL; jvmp = jvmp->caller()) {
+    total += jvmp->debug_size();
+  }
+  return total;
+}
+
+//------------------------------format_helper----------------------------------
+// Given an allocation (a Chaitin object) and a Node decide if the Node carries
+// any defined value or not.  If it does, print out the register or constant.
+#ifndef PRODUCT
+static void format_helper( PhaseRegAlloc *regalloc, outputStream* st, Node *n, const char *msg, uint i ) {
+  if (n == NULL) { st->print(" NULL"); return; }
+  if( OptoReg::is_valid(regalloc->get_reg_first(n))) { // Check for undefined
+    char buf[50];
+    regalloc->dump_register(n,buf);
+    st->print(" %s%d]=%s",msg,i,buf);
+  } else {                      // No register, but might be constant
+    const Type *t = n->bottom_type();
+    switch (t->base()) {
+    case Type::Int:
+      st->print(" %s%d]=#"INT32_FORMAT,msg,i,t->is_int()->get_con());
+      break;
+    case Type::AnyPtr:
+      assert( t == TypePtr::NULL_PTR, "" );
+      st->print(" %s%d]=#NULL",msg,i);
+      break;
+    case Type::AryPtr:
+    case Type::KlassPtr:
+    case Type::InstPtr:
+      st->print(" %s%d]=#Ptr" INTPTR_FORMAT,msg,i,t->isa_oopptr()->const_oop());
+      break;
+    case Type::RawPtr:
+      st->print(" %s%d]=#Raw" INTPTR_FORMAT,msg,i,t->is_rawptr());
+      break;
+    case Type::DoubleCon:
+      st->print(" %s%d]=#%fD",msg,i,t->is_double_constant()->_d);
+      break;
+    case Type::FloatCon:
+      st->print(" %s%d]=#%fF",msg,i,t->is_float_constant()->_f);
+      break;
+    case Type::Long:
+      st->print(" %s%d]=#"INT64_FORMAT,msg,i,t->is_long()->get_con());
+      break;
+    case Type::Half:
+    case Type::Top:
+      st->print(" %s%d]=_",msg,i);
+      break;
+    default: ShouldNotReachHere();
+    }
+  }
+}
+#endif
+
+//------------------------------format-----------------------------------------
+#ifndef PRODUCT
+void JVMState::format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const {
+  st->print("        #");
+  if( _method ) {
+    _method->print_short_name(st);
+    st->print(" @ bci:%d ",_bci);
+  } else {
+    st->print_cr(" runtime stub ");
+    return;
+  }
+  if (n->is_MachSafePoint()) {
+    MachSafePointNode *mcall = n->as_MachSafePoint();
+    uint i;
+    // Print locals
+    for( i = 0; i < (uint)loc_size(); i++ )
+      format_helper( regalloc, st, mcall->local(this, i), "L[", i );
+    // Print stack
+    for (i = 0; i < (uint)stk_size(); i++) {
+      if ((uint)(_stkoff + i) >= mcall->len())
+        st->print(" oob ");
+      else
+       format_helper( regalloc, st, mcall->stack(this, i), "STK[", i );
+    }
+    for (i = 0; (int)i < nof_monitors(); i++) {
+      Node *box = mcall->monitor_box(this, i);
+      Node *obj = mcall->monitor_obj(this, i);
+      if ( OptoReg::is_valid(regalloc->get_reg_first(box)) ) {
+        while( !box->is_BoxLock() )  box = box->in(1);
+        format_helper( regalloc, st, box, "MON-BOX[", i );
+      } else {
+        OptoReg::Name box_reg = BoxLockNode::stack_slot(box);
+        st->print(" MON-BOX%d=%s+%d",
+                   i,
+                   OptoReg::regname(OptoReg::c_frame_pointer),
+                   regalloc->reg2offset(box_reg));
+      }
+      format_helper( regalloc, st, obj, "MON-OBJ[", i );
+    }
+  }
+  st->print_cr("");
+  if (caller() != NULL)  caller()->format(regalloc, n, st);
+}
+#endif
+
+#ifndef PRODUCT
+void JVMState::dump_spec(outputStream *st) const {
+  if (_method != NULL) {
+    bool printed = false;
+    if (!Verbose) {
+      // The JVMS dumps make really, really long lines.
+      // Take out the most boring parts, which are the package prefixes.
+      char buf[500];
+      stringStream namest(buf, sizeof(buf));
+      _method->print_short_name(&namest);
+      if (namest.count() < sizeof(buf)) {
+        const char* name = namest.base();
+        if (name[0] == ' ')  ++name;
+        const char* endcn = strchr(name, ':');  // end of class name
+        if (endcn == NULL)  endcn = strchr(name, '(');
+        if (endcn == NULL)  endcn = name + strlen(name);
+        while (endcn > name && endcn[-1] != '.' && endcn[-1] != '/')
+          --endcn;
+        st->print(" %s", endcn);
+        printed = true;
+      }
+    }
+    if (!printed)
+      _method->print_short_name(st);
+    st->print(" @ bci:%d",_bci);
+  } else {
+    st->print(" runtime stub");
+  }
+  if (caller() != NULL)  caller()->dump_spec(st);
+}
+#endif
+
+#ifndef PRODUCT
+void JVMState::dump_on(outputStream* st) const {
+  if (_map && !((uintptr_t)_map & 1)) {
+    if (_map->len() > _map->req()) {  // _map->has_exceptions()
+      Node* ex = _map->in(_map->req());  // _map->next_exception()
+      // skip the first one; it's already being printed
+      while (ex != NULL && ex->len() > ex->req()) {
+        ex = ex->in(ex->req());  // ex->next_exception()
+        ex->dump(1);
+      }
+    }
+    _map->dump(2);
+  }
+  st->print("JVMS depth=%d loc=%d stk=%d mon=%d end=%d mondepth=%d sp=%d bci=%d method=",
+             depth(), locoff(), stkoff(), monoff(), endoff(), monitor_depth(), sp(), bci());
+  if (_method == NULL) {
+    st->print_cr("(none)");
+  } else {
+    _method->print_name(st);
+    st->cr();
+    if (bci() >= 0 && bci() < _method->code_size()) {
+      st->print("    bc: ");
+      _method->print_codes_on(bci(), bci()+1, st);
+    }
+  }
+  if (caller() != NULL) {
+    caller()->dump_on(st);
+  }
+}
+
+// Extra way to dump a jvms from the debugger,
+// to avoid a bug with C++ member function calls.
+void dump_jvms(JVMState* jvms) {
+  jvms->dump();
+}
+#endif
+
+//--------------------------clone_shallow--------------------------------------
+JVMState* JVMState::clone_shallow(Compile* C) const {
+  JVMState* n = has_method() ? new (C) JVMState(_method, _caller) : new (C) JVMState(0);
+  n->set_bci(_bci);
+  n->set_locoff(_locoff);
+  n->set_stkoff(_stkoff);
+  n->set_monoff(_monoff);
+  n->set_endoff(_endoff);
+  n->set_sp(_sp);
+  n->set_map(_map);
+  return n;
+}
+
+//---------------------------clone_deep----------------------------------------
+JVMState* JVMState::clone_deep(Compile* C) const {
+  JVMState* n = clone_shallow(C);
+  for (JVMState* p = n; p->_caller != NULL; p = p->_caller) {
+    p->_caller = p->_caller->clone_shallow(C);
+  }
+  assert(n->depth() == depth(), "sanity");
+  assert(n->debug_depth() == debug_depth(), "sanity");
+  return n;
+}
+
+//=============================================================================
+uint CallNode::cmp( const Node &n ) const
+{ return _tf == ((CallNode&)n)._tf && _jvms == ((CallNode&)n)._jvms; }
+#ifndef PRODUCT
+void CallNode::dump_req() const {
+  // Dump the required inputs, enclosed in '(' and ')'
+  uint i;                       // Exit value of loop
+  for( i=0; i<req(); i++ ) {    // For all required inputs
+    if( i == TypeFunc::Parms ) tty->print("(");
+    if( in(i) ) tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    else tty->print("_ ");
+  }
+  tty->print(")");
+}
+
+void CallNode::dump_spec(outputStream *st) const {
+  st->print(" ");
+  tf()->dump_on(st);
+  if (_cnt != COUNT_UNKNOWN)  st->print(" C=%f",_cnt);
+  if (jvms() != NULL)  jvms()->dump_spec(st);
+}
+#endif
+
+const Type *CallNode::bottom_type() const { return tf()->range(); }
+const Type *CallNode::Value(PhaseTransform *phase) const {
+  if (phase->type(in(0)) == Type::TOP)  return Type::TOP;
+  return tf()->range();
+}
+
+//------------------------------calling_convention-----------------------------
+void CallNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+  // Use the standard compiler calling convention
+  Matcher::calling_convention( sig_bt, parm_regs, argcnt, true );
+}
+
+
+//------------------------------match------------------------------------------
+// Construct projections for control, I/O, memory-fields, ..., and
+// return result(s) along with their RegMask info
+Node *CallNode::match( const ProjNode *proj, const Matcher *match ) {
+  switch (proj->_con) {
+  case TypeFunc::Control:
+  case TypeFunc::I_O:
+  case TypeFunc::Memory:
+    return new (match->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+
+  case TypeFunc::Parms+1:       // For LONG & DOUBLE returns
+    assert(tf()->_range->field_at(TypeFunc::Parms+1) == Type::HALF, "");
+    // 2nd half of doubles and longs
+    return new (match->C, 1) MachProjNode(this,proj->_con, RegMask::Empty, (uint)OptoReg::Bad);
+
+  case TypeFunc::Parms: {       // Normal returns
+    uint ideal_reg = Matcher::base2reg[tf()->range()->field_at(TypeFunc::Parms)->base()];
+    OptoRegPair regs = is_CallRuntime()
+      ? match->c_return_value(ideal_reg,true)  // Calls into C runtime
+      : match->  return_value(ideal_reg,true); // Calls into compiled Java code
+    RegMask rm = RegMask(regs.first());
+    if( OptoReg::is_valid(regs.second()) )
+      rm.Insert( regs.second() );
+    return new (match->C, 1) MachProjNode(this,proj->_con,rm,ideal_reg);
+  }
+
+  case TypeFunc::ReturnAdr:
+  case TypeFunc::FramePtr:
+  default:
+    ShouldNotReachHere();
+  }
+  return NULL;
+}
+
+// Do we Match on this edge index or not?  Match no edges
+uint CallNode::match_edge(uint idx) const {
+  return 0;
+}
+
+//=============================================================================
+uint CallJavaNode::size_of() const { return sizeof(*this); }
+uint CallJavaNode::cmp( const Node &n ) const {
+  CallJavaNode &call = (CallJavaNode&)n;
+  return CallNode::cmp(call) && _method == call._method;
+}
+#ifndef PRODUCT
+void CallJavaNode::dump_spec(outputStream *st) const {
+  if( _method ) _method->print_short_name(st);
+  CallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallStaticJavaNode::size_of() const { return sizeof(*this); }
+uint CallStaticJavaNode::cmp( const Node &n ) const {
+  CallStaticJavaNode &call = (CallStaticJavaNode&)n;
+  return CallJavaNode::cmp(call);
+}
+
+//----------------------------uncommon_trap_request----------------------------
+// If this is an uncommon trap, return the request code, else zero.
+int CallStaticJavaNode::uncommon_trap_request() const {
+  if (_name != NULL && !strcmp(_name, "uncommon_trap")) {
+    return extract_uncommon_trap_request(this);
+  }
+  return 0;
+}
+int CallStaticJavaNode::extract_uncommon_trap_request(const Node* call) {
+#ifndef PRODUCT
+  if (!(call->req() > TypeFunc::Parms &&
+        call->in(TypeFunc::Parms) != NULL &&
+        call->in(TypeFunc::Parms)->is_Con())) {
+    assert(_in_dump_cnt != 0, "OK if dumping");
+    tty->print("[bad uncommon trap]");
+    return 0;
+  }
+#endif
+  return call->in(TypeFunc::Parms)->bottom_type()->is_int()->get_con();
+}
+
+#ifndef PRODUCT
+void CallStaticJavaNode::dump_spec(outputStream *st) const {
+  st->print("# Static ");
+  if (_name != NULL) {
+    st->print("%s", _name);
+    int trap_req = uncommon_trap_request();
+    if (trap_req != 0) {
+      char buf[100];
+      st->print("(%s)",
+                 Deoptimization::format_trap_request(buf, sizeof(buf),
+                                                     trap_req));
+    }
+    st->print(" ");
+  }
+  CallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallDynamicJavaNode::size_of() const { return sizeof(*this); }
+uint CallDynamicJavaNode::cmp( const Node &n ) const {
+  CallDynamicJavaNode &call = (CallDynamicJavaNode&)n;
+  return CallJavaNode::cmp(call);
+}
+#ifndef PRODUCT
+void CallDynamicJavaNode::dump_spec(outputStream *st) const {
+  st->print("# Dynamic ");
+  CallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint CallRuntimeNode::size_of() const { return sizeof(*this); }
+uint CallRuntimeNode::cmp( const Node &n ) const {
+  CallRuntimeNode &call = (CallRuntimeNode&)n;
+  return CallNode::cmp(call) && !strcmp(_name,call._name);
+}
+#ifndef PRODUCT
+void CallRuntimeNode::dump_spec(outputStream *st) const {
+  st->print("# ");
+  st->print(_name);
+  CallNode::dump_spec(st);
+}
+#endif
+
+//------------------------------calling_convention-----------------------------
+void CallRuntimeNode::calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const {
+  Matcher::c_calling_convention( sig_bt, parm_regs, argcnt );
+}
+
+//=============================================================================
+//------------------------------calling_convention-----------------------------
+
+
+//=============================================================================
+#ifndef PRODUCT
+void CallLeafNode::dump_spec(outputStream *st) const {
+  st->print("# ");
+  st->print(_name);
+  CallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+
+void SafePointNode::set_local(JVMState* jvms, uint idx, Node *c) {
+  assert(verify_jvms(jvms), "jvms must match");
+  int loc = jvms->locoff() + idx;
+  if (in(loc)->is_top() && idx > 0 && !c->is_top() ) {
+    // If current local idx is top then local idx - 1 could
+    // be a long/double that needs to be killed since top could
+    // represent the 2nd half ofthe long/double.
+    uint ideal = in(loc -1)->ideal_reg();
+    if (ideal == Op_RegD || ideal == Op_RegL) {
+      // set other (low index) half to top
+      set_req(loc - 1, in(loc));
+    }
+  }
+  set_req(loc, c);
+}
+
+uint SafePointNode::size_of() const { return sizeof(*this); }
+uint SafePointNode::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+//-------------------------set_next_exception----------------------------------
+void SafePointNode::set_next_exception(SafePointNode* n) {
+  assert(n == NULL || n->Opcode() == Op_SafePoint, "correct value for next_exception");
+  if (len() == req()) {
+    if (n != NULL)  add_prec(n);
+  } else {
+    set_prec(req(), n);
+  }
+}
+
+
+//----------------------------next_exception-----------------------------------
+SafePointNode* SafePointNode::next_exception() const {
+  if (len() == req()) {
+    return NULL;
+  } else {
+    Node* n = in(req());
+    assert(n == NULL || n->Opcode() == Op_SafePoint, "no other uses of prec edges");
+    return (SafePointNode*) n;
+  }
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Skip over any collapsed Regions
+Node *SafePointNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (remove_dead_region(phase, can_reshape))  return this;
+
+  return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Remove obviously duplicate safepoints
+Node *SafePointNode::Identity( PhaseTransform *phase ) {
+
+  // If you have back to back safepoints, remove one
+  if( in(TypeFunc::Control)->is_SafePoint() )
+    return in(TypeFunc::Control);
+
+  if( in(0)->is_Proj() ) {
+    Node *n0 = in(0)->in(0);
+    // Check if he is a call projection (except Leaf Call)
+    if( n0->is_Catch() ) {
+      n0 = n0->in(0)->in(0);
+      assert( n0->is_Call(), "expect a call here" );
+    }
+    if( n0->is_Call() && n0->as_Call()->guaranteed_safepoint() ) {
+      // Useless Safepoint, so remove it
+      return in(TypeFunc::Control);
+    }
+  }
+
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *SafePointNode::Value( PhaseTransform *phase ) const {
+  if( phase->type(in(0)) == Type::TOP ) return Type::TOP;
+  if( phase->eqv( in(0), this ) ) return Type::TOP; // Dead infinite loop
+  return Type::CONTROL;
+}
+
+#ifndef PRODUCT
+void SafePointNode::dump_spec(outputStream *st) const {
+  st->print(" SafePoint ");
+}
+#endif
+
+const RegMask &SafePointNode::in_RegMask(uint idx) const {
+  if( idx < TypeFunc::Parms ) return RegMask::Empty;
+  // Values outside the domain represent debug info
+  return *(Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()]);
+}
+const RegMask &SafePointNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+
+void SafePointNode::grow_stack(JVMState* jvms, uint grow_by) {
+  assert((int)grow_by > 0, "sanity");
+  int monoff = jvms->monoff();
+  int endoff = jvms->endoff();
+  assert(endoff == (int)req(), "no other states or debug info after me");
+  Node* top = Compile::current()->top();
+  for (uint i = 0; i < grow_by; i++) {
+    ins_req(monoff, top);
+  }
+  jvms->set_monoff(monoff + grow_by);
+  jvms->set_endoff(endoff + grow_by);
+}
+
+void SafePointNode::push_monitor(const FastLockNode *lock) {
+  // Add a LockNode, which points to both the original BoxLockNode (the
+  // stack space for the monitor) and the Object being locked.
+  const int MonitorEdges = 2;
+  assert(JVMState::logMonitorEdges == exact_log2(MonitorEdges), "correct MonitorEdges");
+  assert(req() == jvms()->endoff(), "correct sizing");
+  if (GenerateSynchronizationCode) {
+    add_req(lock->box_node());
+    add_req(lock->obj_node());
+  } else {
+    add_req(NULL);
+    add_req(NULL);
+  }
+  jvms()->set_endoff(req());
+}
+
+void SafePointNode::pop_monitor() {
+  // Delete last monitor from debug info
+  debug_only(int num_before_pop = jvms()->nof_monitors());
+  const int MonitorEdges = (1<<JVMState::logMonitorEdges);
+  int endoff = jvms()->endoff();
+  int new_endoff = endoff - MonitorEdges;
+  jvms()->set_endoff(new_endoff);
+  while (endoff > new_endoff)  del_req(--endoff);
+  assert(jvms()->nof_monitors() == num_before_pop-1, "");
+}
+
+Node *SafePointNode::peek_monitor_box() const {
+  int mon = jvms()->nof_monitors() - 1;
+  assert(mon >= 0, "most have a monitor");
+  return monitor_box(jvms(), mon);
+}
+
+Node *SafePointNode::peek_monitor_obj() const {
+  int mon = jvms()->nof_monitors() - 1;
+  assert(mon >= 0, "most have a monitor");
+  return monitor_obj(jvms(), mon);
+}
+
+// Do we Match on this edge index or not?  Match no edges
+uint SafePointNode::match_edge(uint idx) const {
+  if( !needs_polling_address_input() )
+    return 0;
+
+  return (TypeFunc::Parms == idx);
+}
+
+//=============================================================================
+uint AllocateNode::size_of() const { return sizeof(*this); }
+
+AllocateNode::AllocateNode(Compile* C, const TypeFunc *atype,
+                           Node *ctrl, Node *mem, Node *abio,
+                           Node *size, Node *klass_node, Node *initial_test)
+  : CallNode(atype, NULL, TypeRawPtr::BOTTOM)
+{
+  init_class_id(Class_Allocate);
+  init_flags(Flag_is_macro);
+  Node *topnode = C->top();
+
+  init_req( TypeFunc::Control  , ctrl );
+  init_req( TypeFunc::I_O      , abio );
+  init_req( TypeFunc::Memory   , mem );
+  init_req( TypeFunc::ReturnAdr, topnode );
+  init_req( TypeFunc::FramePtr , topnode );
+  init_req( AllocSize          , size);
+  init_req( KlassNode          , klass_node);
+  init_req( InitialTest        , initial_test);
+  init_req( ALength            , topnode);
+  C->add_macro_node(this);
+}
+
+//=============================================================================
+uint AllocateArrayNode::size_of() const { return sizeof(*this); }
+
+//=============================================================================
+uint LockNode::size_of() const { return sizeof(*this); }
+
+// Redundant lock elimination
+//
+// There are various patterns of locking where we release and
+// immediately reacquire a lock in a piece of code where no operations
+// occur in between that would be observable.  In those cases we can
+// skip releasing and reacquiring the lock without violating any
+// fairness requirements.  Doing this around a loop could cause a lock
+// to be held for a very long time so we concentrate on non-looping
+// control flow.  We also require that the operations are fully
+// redundant meaning that we don't introduce new lock operations on
+// some paths so to be able to eliminate it on others ala PRE.  This
+// would probably require some more extensive graph manipulation to
+// guarantee that the memory edges were all handled correctly.
+//
+// Assuming p is a simple predicate which can't trap in any way and s
+// is a synchronized method consider this code:
+//
+//   s();
+//   if (p)
+//     s();
+//   else
+//     s();
+//   s();
+//
+// 1. The unlocks of the first call to s can be eliminated if the
+// locks inside the then and else branches are eliminated.
+//
+// 2. The unlocks of the then and else branches can be eliminated if
+// the lock of the final call to s is eliminated.
+//
+// Either of these cases subsumes the simple case of sequential control flow
+//
+// Addtionally we can eliminate versions without the else case:
+//
+//   s();
+//   if (p)
+//     s();
+//   s();
+//
+// 3. In this case we eliminate the unlock of the first s, the lock
+// and unlock in the then case and the lock in the final s.
+//
+// Note also that in all these cases the then/else pieces don't have
+// to be trivial as long as they begin and end with synchronization
+// operations.
+//
+//   s();
+//   if (p)
+//     s();
+//     f();
+//     s();
+//   s();
+//
+// The code will work properly for this case, leaving in the unlock
+// before the call to f and the relock after it.
+//
+// A potentially interesting case which isn't handled here is when the
+// locking is partially redundant.
+//
+//   s();
+//   if (p)
+//     s();
+//
+// This could be eliminated putting unlocking on the else case and
+// eliminating the first unlock and the lock in the then side.
+// Alternatively the unlock could be moved out of the then side so it
+// was after the merge and the first unlock and second lock
+// eliminated.  This might require less manipulation of the memory
+// state to get correct.
+//
+// Additionally we might allow work between a unlock and lock before
+// giving up eliminating the locks.  The current code disallows any
+// conditional control flow between these operations.  A formulation
+// similar to partial redundancy elimination computing the
+// availability of unlocking and the anticipatability of locking at a
+// program point would allow detection of fully redundant locking with
+// some amount of work in between.  I'm not sure how often I really
+// think that would occur though.  Most of the cases I've seen
+// indicate it's likely non-trivial work would occur in between.
+// There may be other more complicated constructs where we could
+// eliminate locking but I haven't seen any others appear as hot or
+// interesting.
+//
+// Locking and unlocking have a canonical form in ideal that looks
+// roughly like this:
+//
+//              <obj>
+//                | \\------+
+//                |  \       \
+//                | BoxLock   \
+//                |  |   |     \
+//                |  |    \     \
+//                |  |   FastLock
+//                |  |   /
+//                |  |  /
+//                |  |  |
+//
+//               Lock
+//                |
+//            Proj #0
+//                |
+//            MembarAcquire
+//                |
+//            Proj #0
+//
+//            MembarRelease
+//                |
+//            Proj #0
+//                |
+//              Unlock
+//                |
+//            Proj #0
+//
+//
+// This code proceeds by processing Lock nodes during PhaseIterGVN
+// and searching back through its control for the proper code
+// patterns.  Once it finds a set of lock and unlock operations to
+// eliminate they are marked as eliminatable which causes the
+// expansion of the Lock and Unlock macro nodes to make the operation a NOP
+//
+//=============================================================================
+
+//
+// Utility function to skip over uninteresting control nodes.  Nodes skipped are:
+//   - copy regions.  (These may not have been optimized away yet.)
+//   - eliminated locking nodes
+//
+static Node *next_control(Node *ctrl) {
+  if (ctrl == NULL)
+    return NULL;
+  while (1) {
+    if (ctrl->is_Region()) {
+      RegionNode *r = ctrl->as_Region();
+      Node *n = r->is_copy();
+      if (n == NULL)
+        break;  // hit a region, return it
+      else
+        ctrl = n;
+    } else if (ctrl->is_Proj()) {
+      Node *in0 = ctrl->in(0);
+      if (in0->is_AbstractLock() && in0->as_AbstractLock()->is_eliminated()) {
+        ctrl = in0->in(0);
+      } else {
+        break;
+      }
+    } else {
+      break; // found an interesting control
+    }
+  }
+  return ctrl;
+}
+//
+// Given a control, see if it's the control projection of an Unlock which
+// operating on the same object as lock.
+//
+bool AbstractLockNode::find_matching_unlock(const Node* ctrl, LockNode* lock,
+                                            GrowableArray<AbstractLockNode*> &lock_ops) {
+  ProjNode *ctrl_proj = (ctrl->is_Proj()) ? ctrl->as_Proj() : NULL;
+  if (ctrl_proj != NULL && ctrl_proj->_con == TypeFunc::Control) {
+    Node *n = ctrl_proj->in(0);
+    if (n != NULL && n->is_Unlock()) {
+      UnlockNode *unlock = n->as_Unlock();
+      if ((lock->obj_node() == unlock->obj_node()) &&
+          (lock->box_node() == unlock->box_node()) && !unlock->is_eliminated()) {
+        lock_ops.append(unlock);
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
+//
+// Find the lock matching an unlock.  Returns null if a safepoint
+// or complicated control is encountered first.
+LockNode *AbstractLockNode::find_matching_lock(UnlockNode* unlock) {
+  LockNode *lock_result = NULL;
+  // find the matching lock, or an intervening safepoint
+  Node *ctrl = next_control(unlock->in(0));
+  while (1) {
+    assert(ctrl != NULL, "invalid control graph");
+    assert(!ctrl->is_Start(), "missing lock for unlock");
+    if (ctrl->is_top()) break;  // dead control path
+    if (ctrl->is_Proj()) ctrl = ctrl->in(0);
+    if (ctrl->is_SafePoint()) {
+        break;  // found a safepoint (may be the lock we are searching for)
+    } else if (ctrl->is_Region()) {
+      // Check for a simple diamond pattern.  Punt on anything more complicated
+      if (ctrl->req() == 3 && ctrl->in(1) != NULL && ctrl->in(2) != NULL) {
+        Node *in1 = next_control(ctrl->in(1));
+        Node *in2 = next_control(ctrl->in(2));
+        if (((in1->is_IfTrue() && in2->is_IfFalse()) ||
+             (in2->is_IfTrue() && in1->is_IfFalse())) && (in1->in(0) == in2->in(0))) {
+          ctrl = next_control(in1->in(0)->in(0));
+        } else {
+          break;
+        }
+      } else {
+        break;
+      }
+    } else {
+      ctrl = next_control(ctrl->in(0));  // keep searching
+    }
+  }
+  if (ctrl->is_Lock()) {
+    LockNode *lock = ctrl->as_Lock();
+    if ((lock->obj_node() == unlock->obj_node()) &&
+            (lock->box_node() == unlock->box_node())) {
+      lock_result = lock;
+    }
+  }
+  return lock_result;
+}
+
+// This code corresponds to case 3 above.
+
+bool AbstractLockNode::find_lock_and_unlock_through_if(Node* node, LockNode* lock,
+                                                       GrowableArray<AbstractLockNode*> &lock_ops) {
+  Node* if_node = node->in(0);
+  bool  if_true = node->is_IfTrue();
+
+  if (if_node->is_If() && if_node->outcnt() == 2 && (if_true || node->is_IfFalse())) {
+    Node *lock_ctrl = next_control(if_node->in(0));
+    if (find_matching_unlock(lock_ctrl, lock, lock_ops)) {
+      Node* lock1_node = NULL;
+      ProjNode* proj = if_node->as_If()->proj_out(!if_true);
+      if (if_true) {
+        if (proj->is_IfFalse() && proj->outcnt() == 1) {
+          lock1_node = proj->unique_out();
+        }
+      } else {
+        if (proj->is_IfTrue() && proj->outcnt() == 1) {
+          lock1_node = proj->unique_out();
+        }
+      }
+      if (lock1_node != NULL && lock1_node->is_Lock()) {
+        LockNode *lock1 = lock1_node->as_Lock();
+        if ((lock->obj_node() == lock1->obj_node()) &&
+            (lock->box_node() == lock1->box_node()) && !lock1->is_eliminated()) {
+          lock_ops.append(lock1);
+          return true;
+        }
+      }
+    }
+  }
+
+  lock_ops.trunc_to(0);
+  return false;
+}
+
+bool AbstractLockNode::find_unlocks_for_region(const RegionNode* region, LockNode* lock,
+                               GrowableArray<AbstractLockNode*> &lock_ops) {
+  // check each control merging at this point for a matching unlock.
+  // in(0) should be self edge so skip it.
+  for (int i = 1; i < (int)region->req(); i++) {
+    Node *in_node = next_control(region->in(i));
+    if (in_node != NULL) {
+      if (find_matching_unlock(in_node, lock, lock_ops)) {
+        // found a match so keep on checking.
+        continue;
+      } else if (find_lock_and_unlock_through_if(in_node, lock, lock_ops)) {
+        continue;
+      }
+
+      // If we fall through to here then it was some kind of node we
+      // don't understand or there wasn't a matching unlock, so give
+      // up trying to merge locks.
+      lock_ops.trunc_to(0);
+      return false;
+    }
+  }
+  return true;
+
+}
+
+#ifndef PRODUCT
+//
+// Create a counter which counts the number of times this lock is acquired
+//
+void AbstractLockNode::create_lock_counter(JVMState* state) {
+  _counter = OptoRuntime::new_named_counter(state, NamedCounter::LockCounter);
+}
+#endif
+
+void AbstractLockNode::set_eliminated() {
+  _eliminate = true;
+#ifndef PRODUCT
+  if (_counter) {
+    // Update the counter to indicate that this lock was eliminated.
+    // The counter update code will stay around even though the
+    // optimizer will eliminate the lock operation itself.
+    _counter->set_tag(NamedCounter::EliminatedLockCounter);
+  }
+#endif
+}
+
+//=============================================================================
+Node *LockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+
+  // perform any generic optimizations first
+  Node *result = SafePointNode::Ideal(phase, can_reshape);
+
+  // Now see if we can optimize away this lock.  We don't actually
+  // remove the locking here, we simply set the _eliminate flag which
+  // prevents macro expansion from expanding the lock.  Since we don't
+  // modify the graph, the value returned from this function is the
+  // one computed above.
+  if (EliminateLocks && !is_eliminated()) {
+    //
+    // Try lock coarsening
+    //
+    PhaseIterGVN* iter = phase->is_IterGVN();
+    if (iter != NULL) {
+
+      GrowableArray<AbstractLockNode*>   lock_ops;
+
+      Node *ctrl = next_control(in(0));
+
+      // now search back for a matching Unlock
+      if (find_matching_unlock(ctrl, this, lock_ops)) {
+        // found an unlock directly preceding this lock.  This is the
+        // case of single unlock directly control dependent on a
+        // single lock which is the trivial version of case 1 or 2.
+      } else if (ctrl->is_Region() ) {
+        if (find_unlocks_for_region(ctrl->as_Region(), this, lock_ops)) {
+        // found lock preceded by multiple unlocks along all paths
+        // joining at this point which is case 3 in description above.
+        }
+      } else {
+        // see if this lock comes from either half of an if and the
+        // predecessors merges unlocks and the other half of the if
+        // performs a lock.
+        if (find_lock_and_unlock_through_if(ctrl, this, lock_ops)) {
+          // found unlock splitting to an if with locks on both branches.
+        }
+      }
+
+      if (lock_ops.length() > 0) {
+        // add ourselves to the list of locks to be eliminated.
+        lock_ops.append(this);
+
+  #ifndef PRODUCT
+        if (PrintEliminateLocks) {
+          int locks = 0;
+          int unlocks = 0;
+          for (int i = 0; i < lock_ops.length(); i++) {
+            AbstractLockNode* lock = lock_ops.at(i);
+            if (lock->Opcode() == Op_Lock) locks++;
+            else                               unlocks++;
+            if (Verbose) {
+              lock->dump(1);
+            }
+          }
+          tty->print_cr("***Eliminated %d unlocks and %d locks", unlocks, locks);
+        }
+  #endif
+
+        // for each of the identified locks, mark them
+        // as eliminatable
+        for (int i = 0; i < lock_ops.length(); i++) {
+          AbstractLockNode* lock = lock_ops.at(i);
+
+          // Mark it eliminated to update any counters
+          lock->set_eliminated();
+        }
+      } else if (result != NULL && ctrl->is_Region() &&
+                 iter->_worklist.member(ctrl)) {
+        // We weren't able to find any opportunities but the region this
+        // lock is control dependent on hasn't been processed yet so put
+        // this lock back on the worklist so we can check again once any
+        // region simplification has occurred.
+        iter->_worklist.push(this);
+      }
+    }
+  }
+
+  return result;
+}
+
+//=============================================================================
+uint UnlockNode::size_of() const { return sizeof(*this); }
+
+//=============================================================================
+Node *UnlockNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+
+  // perform any generic optimizations first
+  Node * result = SafePointNode::Ideal(phase, can_reshape);
+
+  // Now see if we can optimize away this unlock.  We don't actually
+  // remove the unlocking here, we simply set the _eliminate flag which
+  // prevents macro expansion from expanding the unlock.  Since we don't
+  // modify the graph, the value returned from this function is the
+  // one computed above.
+  if (EliminateLocks && !is_eliminated()) {
+    //
+    // If we are unlocking an unescaped object, the lock/unlock is unnecessary
+    // We can eliminate them if there are no safepoints in the locked region.
+    //
+    ConnectionGraph *cgr = Compile::current()->congraph();
+    if (cgr != NULL && cgr->escape_state(obj_node(), phase) == PointsToNode::NoEscape) {
+      GrowableArray<AbstractLockNode*>   lock_ops;
+      LockNode *lock = find_matching_lock(this);
+      if (lock != NULL) {
+        lock_ops.append(this);
+        lock_ops.append(lock);
+        // find other unlocks which pair with the lock we found and add them
+        // to the list
+        Node * box = box_node();
+
+        for (DUIterator_Fast imax, i = box->fast_outs(imax); i < imax; i++) {
+          Node *use = box->fast_out(i);
+          if (use->is_Unlock() && use != this) {
+            UnlockNode *unlock1 = use->as_Unlock();
+            if (!unlock1->is_eliminated()) {
+              LockNode *lock1 = find_matching_lock(unlock1);
+              if (lock == lock1)
+                lock_ops.append(unlock1);
+              else if (lock1 == NULL) {
+               // we can't find a matching lock, we must assume the worst
+                lock_ops.trunc_to(0);
+                break;
+              }
+            }
+          }
+        }
+        if (lock_ops.length() > 0) {
+
+  #ifndef PRODUCT
+          if (PrintEliminateLocks) {
+            int locks = 0;
+            int unlocks = 0;
+            for (int i = 0; i < lock_ops.length(); i++) {
+              AbstractLockNode* lock = lock_ops.at(i);
+              if (lock->Opcode() == Op_Lock) locks++;
+              else                               unlocks++;
+              if (Verbose) {
+                lock->dump(1);
+              }
+            }
+            tty->print_cr("***Eliminated %d unescaped unlocks and %d unescaped locks", unlocks, locks);
+          }
+  #endif
+
+          // for each of the identified locks, mark them
+          // as eliminatable
+          for (int i = 0; i < lock_ops.length(); i++) {
+            AbstractLockNode* lock = lock_ops.at(i);
+
+            // Mark it eliminated to update any counters
+            lock->set_eliminated();
+          }
+        }
+      }
+    }
+  }
+  return result;
+}
diff --git a/src/share/vm/opto/callnode.hpp b/src/share/vm/opto/callnode.hpp
new file mode 100644
index 000000000..e1e6116b6
--- /dev/null
+++ b/src/share/vm/opto/callnode.hpp
@@ -0,0 +1,814 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+class Chaitin;
+class NamedCounter;
+class MultiNode;
+class  SafePointNode;
+class   CallNode;
+class     CallJavaNode;
+class       CallStaticJavaNode;
+class       CallDynamicJavaNode;
+class     CallRuntimeNode;
+class       CallLeafNode;
+class         CallLeafNoFPNode;
+class     AllocateNode;
+class     AllocateArrayNode;
+class     LockNode;
+class     UnlockNode;
+class JVMState;
+class OopMap;
+class State;
+class StartNode;
+class MachCallNode;
+class FastLockNode;
+
+//------------------------------StartNode--------------------------------------
+// The method start node
+class StartNode : public MultiNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  const TypeTuple *_domain;
+  StartNode( Node *root, const TypeTuple *domain ) : MultiNode(2), _domain(domain) {
+    init_class_id(Class_Start);
+    init_flags(Flag_is_block_start);
+    init_req(0,this);
+    init_req(1,root);
+  }
+  virtual int Opcode() const;
+  virtual bool pinned() const { return true; };
+  virtual const Type *bottom_type() const;
+  virtual const TypePtr *adr_type() const { return TypePtr::BOTTOM; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual void  calling_convention( BasicType* sig_bt, VMRegPair *parm_reg, uint length ) const;
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual uint ideal_reg() const { return 0; }
+#ifndef PRODUCT
+  virtual void  dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------StartOSRNode-----------------------------------
+// The method start node for on stack replacement code
+class StartOSRNode : public StartNode {
+public:
+  StartOSRNode( Node *root, const TypeTuple *domain ) : StartNode(root, domain) {}
+  virtual int   Opcode() const;
+  static  const TypeTuple *osr_domain();
+};
+
+
+//------------------------------ParmNode---------------------------------------
+// Incoming parameters
+class ParmNode : public ProjNode {
+  static const char * const names[TypeFunc::Parms+1];
+public:
+  ParmNode( StartNode *src, uint con ) : ProjNode(src,con) {}
+  virtual int Opcode() const;
+  virtual bool  is_CFG() const { return (_con == TypeFunc::Control); }
+  virtual uint ideal_reg() const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+//------------------------------ReturnNode-------------------------------------
+// Return from subroutine node
+class ReturnNode : public Node {
+public:
+  ReturnNode( uint edges, Node *cntrl, Node *i_o, Node *memory, Node *retadr, Node *frameptr );
+  virtual int Opcode() const;
+  virtual bool  is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual bool depends_only_on_test() const { return false; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual uint match_edge(uint idx) const;
+#ifndef PRODUCT
+  virtual void dump_req() const;
+#endif
+};
+
+
+//------------------------------RethrowNode------------------------------------
+// Rethrow of exception at call site.  Ends a procedure before rethrowing;
+// ends the current basic block like a ReturnNode.  Restores registers and
+// unwinds stack.  Rethrow happens in the caller's method.
+class RethrowNode : public Node {
+ public:
+  RethrowNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *ret_adr, Node *exception );
+  virtual int Opcode() const;
+  virtual bool  is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual bool depends_only_on_test() const { return false; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint match_edge(uint idx) const;
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+#ifndef PRODUCT
+  virtual void dump_req() const;
+#endif
+};
+
+
+//------------------------------TailCallNode-----------------------------------
+// Pop stack frame and jump indirect
+class TailCallNode : public ReturnNode {
+public:
+  TailCallNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *retadr, Node *target, Node *moop )
+    : ReturnNode( TypeFunc::Parms+2, cntrl, i_o, memory, frameptr, retadr ) {
+    init_req(TypeFunc::Parms, target);
+    init_req(TypeFunc::Parms+1, moop);
+  }
+
+  virtual int Opcode() const;
+  virtual uint match_edge(uint idx) const;
+};
+
+//------------------------------TailJumpNode-----------------------------------
+// Pop stack frame and jump indirect
+class TailJumpNode : public ReturnNode {
+public:
+  TailJumpNode( Node *cntrl, Node *i_o, Node *memory, Node *frameptr, Node *target, Node *ex_oop)
+    : ReturnNode(TypeFunc::Parms+2, cntrl, i_o, memory, frameptr, Compile::current()->top()) {
+    init_req(TypeFunc::Parms, target);
+    init_req(TypeFunc::Parms+1, ex_oop);
+  }
+
+  virtual int Opcode() const;
+  virtual uint match_edge(uint idx) const;
+};
+
+//-------------------------------JVMState-------------------------------------
+// A linked list of JVMState nodes captures the whole interpreter state,
+// plus GC roots, for all active calls at some call site in this compilation
+// unit.  (If there is no inlining, then the list has exactly one link.)
+// This provides a way to map the optimized program back into the interpreter,
+// or to let the GC mark the stack.
+class JVMState : public ResourceObj {
+private:
+  JVMState*         _caller;    // List pointer for forming scope chains
+  uint              _depth;     // One mroe than caller depth, or one.
+  uint              _locoff;    // Offset to locals in input edge mapping
+  uint              _stkoff;    // Offset to stack in input edge mapping
+  uint              _monoff;    // Offset to monitors in input edge mapping
+  uint              _endoff;    // Offset to end of input edge mapping
+  uint              _sp;        // Jave Expression Stack Pointer for this state
+  int               _bci;       // Byte Code Index of this JVM point
+  ciMethod*         _method;    // Method Pointer
+  SafePointNode*    _map;       // Map node associated with this scope
+public:
+  friend class Compile;
+
+  // Because JVMState objects live over the entire lifetime of the
+  // Compile object, they are allocated into the comp_arena, which
+  // does not get resource marked or reset during the compile process
+  void *operator new( size_t x, Compile* C ) { return C->comp_arena()->Amalloc(x); }
+  void operator delete( void * ) { } // fast deallocation
+
+  // Create a new JVMState, ready for abstract interpretation.
+  JVMState(ciMethod* method, JVMState* caller);
+  JVMState(int stack_size);  // root state; has a null method
+
+  // Access functions for the JVM
+  uint              locoff() const { return _locoff; }
+  uint              stkoff() const { return _stkoff; }
+  uint              argoff() const { return _stkoff + _sp; }
+  uint              monoff() const { return _monoff; }
+  uint              endoff() const { return _endoff; }
+  uint              oopoff() const { return debug_end(); }
+
+  int            loc_size() const { return _stkoff - _locoff; }
+  int            stk_size() const { return _monoff - _stkoff; }
+  int            mon_size() const { return _endoff - _monoff; }
+
+  bool        is_loc(uint i) const { return i >= _locoff && i < _stkoff; }
+  bool        is_stk(uint i) const { return i >= _stkoff && i < _monoff; }
+  bool        is_mon(uint i) const { return i >= _monoff && i < _endoff; }
+
+  uint              sp()     const { return _sp; }
+  int               bci()    const { return _bci; }
+  bool          has_method() const { return _method != NULL; }
+  ciMethod*         method() const { assert(has_method(), ""); return _method; }
+  JVMState*         caller() const { return _caller; }
+  SafePointNode*    map()    const { return _map; }
+  uint              depth()  const { return _depth; }
+  uint        debug_start()  const; // returns locoff of root caller
+  uint        debug_end()    const; // returns endoff of self
+  uint        debug_size()   const { return loc_size() + sp() + mon_size(); }
+  uint        debug_depth()  const; // returns sum of debug_size values at all depths
+
+  // Returns the JVM state at the desired depth (1 == root).
+  JVMState* of_depth(int d) const;
+
+  // Tells if two JVM states have the same call chain (depth, methods, & bcis).
+  bool same_calls_as(const JVMState* that) const;
+
+  // Monitors (monitors are stored as (boxNode, objNode) pairs
+  enum { logMonitorEdges = 1 };
+  int  nof_monitors()              const { return mon_size() >> logMonitorEdges; }
+  int  monitor_depth()             const { return nof_monitors() + (caller() ? caller()->monitor_depth() : 0); }
+  int  monitor_box_offset(int idx) const { return monoff() + (idx << logMonitorEdges) + 0; }
+  int  monitor_obj_offset(int idx) const { return monoff() + (idx << logMonitorEdges) + 1; }
+  bool is_monitor_box(uint off)    const {
+    assert(is_mon(off), "should be called only for monitor edge");
+    return (0 == bitfield(off - monoff(), 0, logMonitorEdges));
+  }
+  bool is_monitor_use(uint off)    const { return (is_mon(off)
+                                                   && is_monitor_box(off))
+                                             || (caller() && caller()->is_monitor_use(off)); }
+
+  // Initialization functions for the JVM
+  void              set_locoff(uint off) { _locoff = off; }
+  void              set_stkoff(uint off) { _stkoff = off; }
+  void              set_monoff(uint off) { _monoff = off; }
+  void              set_endoff(uint off) { _endoff = off; }
+  void              set_offsets(uint off) { _locoff = _stkoff = _monoff = _endoff = off; }
+  void              set_map(SafePointNode *map) { _map = map; }
+  void              set_sp(uint sp) { _sp = sp; }
+  void              set_bci(int bci) { _bci = bci; }
+
+  // Miscellaneous utility functions
+  JVMState* clone_deep(Compile* C) const;    // recursively clones caller chain
+  JVMState* clone_shallow(Compile* C) const; // retains uncloned caller
+
+#ifndef PRODUCT
+  void      format(PhaseRegAlloc *regalloc, const Node *n, outputStream* st) const;
+  void      dump_spec(outputStream *st) const;
+  void      dump_on(outputStream* st) const;
+  void      dump() const {
+    dump_on(tty);
+  }
+#endif
+};
+
+//------------------------------SafePointNode----------------------------------
+// A SafePointNode is a subclass of a MultiNode for convenience (and
+// potential code sharing) only - conceptually it is independent of
+// the Node semantics.
+class SafePointNode : public MultiNode {
+  virtual uint           cmp( const Node &n ) const;
+  virtual uint           size_of() const;       // Size is bigger
+
+public:
+  SafePointNode(uint edges, JVMState* jvms,
+                // A plain safepoint advertises no memory effects (NULL):
+                const TypePtr* adr_type = NULL)
+    : MultiNode( edges ),
+      _jvms(jvms),
+      _oop_map(NULL),
+      _adr_type(adr_type)
+  {
+    init_class_id(Class_SafePoint);
+  }
+
+  OopMap*         _oop_map;   // Array of OopMap info (8-bit char) for GC
+  JVMState* const _jvms;      // Pointer to list of JVM State objects
+  const TypePtr*  _adr_type;  // What type of memory does this node produce?
+
+  // Many calls take *all* of memory as input,
+  // but some produce a limited subset of that memory as output.
+  // The adr_type reports the call's behavior as a store, not a load.
+
+  virtual JVMState* jvms() const { return _jvms; }
+  void set_jvms(JVMState* s) {
+    *(JVMState**)&_jvms = s;  // override const attribute in the accessor
+  }
+  OopMap *oop_map() const { return _oop_map; }
+  void set_oop_map(OopMap *om) { _oop_map = om; }
+
+  // Functionality from old debug nodes which has changed
+  Node *local(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(jvms->locoff() + idx);
+  }
+  Node *stack(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(jvms->stkoff() + idx);
+  }
+  Node *argument(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(jvms->argoff() + idx);
+  }
+  Node *monitor_box(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(jvms->monitor_box_offset(idx));
+  }
+  Node *monitor_obj(JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(jvms->monitor_obj_offset(idx));
+  }
+
+  void  set_local(JVMState* jvms, uint idx, Node *c);
+
+  void  set_stack(JVMState* jvms, uint idx, Node *c) {
+    assert(verify_jvms(jvms), "jvms must match");
+    set_req(jvms->stkoff() + idx, c);
+  }
+  void  set_argument(JVMState* jvms, uint idx, Node *c) {
+    assert(verify_jvms(jvms), "jvms must match");
+    set_req(jvms->argoff() + idx, c);
+  }
+  void ensure_stack(JVMState* jvms, uint stk_size) {
+    assert(verify_jvms(jvms), "jvms must match");
+    int grow_by = (int)stk_size - (int)jvms->stk_size();
+    if (grow_by > 0)  grow_stack(jvms, grow_by);
+  }
+  void grow_stack(JVMState* jvms, uint grow_by);
+  // Handle monitor stack
+  void push_monitor( const FastLockNode *lock );
+  void pop_monitor ();
+  Node *peek_monitor_box() const;
+  Node *peek_monitor_obj() const;
+
+  // Access functions for the JVM
+  Node *control  () const { return in(TypeFunc::Control  ); }
+  Node *i_o      () const { return in(TypeFunc::I_O      ); }
+  Node *memory   () const { return in(TypeFunc::Memory   ); }
+  Node *returnadr() const { return in(TypeFunc::ReturnAdr); }
+  Node *frameptr () const { return in(TypeFunc::FramePtr ); }
+
+  void set_control  ( Node *c ) { set_req(TypeFunc::Control,c); }
+  void set_i_o      ( Node *c ) { set_req(TypeFunc::I_O    ,c); }
+  void set_memory   ( Node *c ) { set_req(TypeFunc::Memory ,c); }
+
+  MergeMemNode* merged_memory() const {
+    return in(TypeFunc::Memory)->as_MergeMem();
+  }
+
+  // The parser marks useless maps as dead when it's done with them:
+  bool is_killed() { return in(TypeFunc::Control) == NULL; }
+
+  // Exception states bubbling out of subgraphs such as inlined calls
+  // are recorded here.  (There might be more than one, hence the "next".)
+  // This feature is used only for safepoints which serve as "maps"
+  // for JVM states during parsing, intrinsic expansion, etc.
+  SafePointNode*         next_exception() const;
+  void               set_next_exception(SafePointNode* n);
+  bool                   has_exceptions() const { return next_exception() != NULL; }
+
+  // Standard Node stuff
+  virtual int            Opcode() const;
+  virtual bool           pinned() const { return true; }
+  virtual const Type    *Value( PhaseTransform *phase ) const;
+  virtual const Type    *bottom_type() const { return Type::CONTROL; }
+  virtual const TypePtr *adr_type() const { return _adr_type; }
+  virtual Node          *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node          *Identity( PhaseTransform *phase );
+  virtual uint           ideal_reg() const { return 0; }
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual const RegMask &out_RegMask() const;
+  virtual uint           match_edge(uint idx) const;
+
+  static  bool           needs_polling_address_input();
+
+#ifndef PRODUCT
+  virtual void              dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallNode---------------------------------------
+// Call nodes now subsume the function of debug nodes at callsites, so they
+// contain the functionality of a full scope chain of debug nodes.
+class CallNode : public SafePointNode {
+public:
+  const TypeFunc *_tf;        // Function type
+  address      _entry_point;  // Address of method being called
+  float        _cnt;          // Estimate of number of times called
+  PointsToNode::EscapeState _escape_state;
+
+  CallNode(const TypeFunc* tf, address addr, const TypePtr* adr_type)
+    : SafePointNode(tf->domain()->cnt(), NULL, adr_type),
+      _tf(tf),
+      _entry_point(addr),
+      _cnt(COUNT_UNKNOWN)
+  {
+    init_class_id(Class_Call);
+    init_flags(Flag_is_Call);
+    _escape_state = PointsToNode::UnknownEscape;
+  }
+
+  const TypeFunc* tf()        const { return _tf; }
+  const address entry_point() const { return _entry_point; }
+  const float   cnt()         const { return _cnt; }
+
+  void set_tf(const TypeFunc* tf) { _tf = tf; }
+  void set_entry_point(address p) { _entry_point = p; }
+  void set_cnt(float c)           { _cnt = c; }
+
+  virtual const Type *bottom_type() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase ) { return this; }
+  virtual uint        cmp( const Node &n ) const;
+  virtual uint        size_of() const = 0;
+  virtual void        calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const;
+  virtual Node       *match( const ProjNode *proj, const Matcher *m );
+  virtual uint        ideal_reg() const { return NotAMachineReg; }
+  // Are we guaranteed that this node is a safepoint?  Not true for leaf calls and
+  // for some macro nodes whose expansion does not have a safepoint on the fast path.
+  virtual bool        guaranteed_safepoint()  { return true; }
+  // For macro nodes, the JVMState gets modified during expansion, so when cloning
+  // the node the JVMState must be cloned.
+  virtual void        clone_jvms() { }   // default is not to clone
+
+  virtual uint match_edge(uint idx) const;
+
+#ifndef PRODUCT
+  virtual void        dump_req()  const;
+  virtual void        dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallJavaNode-----------------------------------
+// Make a static or dynamic subroutine call node using Java calling
+// convention.  (The "Java" calling convention is the compiler's calling
+// convention, as opposed to the interpreter's or that of native C.)
+class CallJavaNode : public CallNode {
+protected:
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+
+  bool    _optimized_virtual;
+  ciMethod* _method;            // Method being direct called
+public:
+  const int       _bci;         // Byte Code Index of call byte code
+  CallJavaNode(const TypeFunc* tf , address addr, ciMethod* method, int bci)
+    : CallNode(tf, addr, TypePtr::BOTTOM),
+      _method(method), _bci(bci), _optimized_virtual(false)
+  {
+    init_class_id(Class_CallJava);
+  }
+
+  virtual int   Opcode() const;
+  ciMethod* method() const                { return _method; }
+  void  set_method(ciMethod *m)           { _method = m; }
+  void  set_optimized_virtual(bool f)     { _optimized_virtual = f; }
+  bool  is_optimized_virtual() const      { return _optimized_virtual; }
+
+#ifndef PRODUCT
+  virtual void  dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallStaticJavaNode-----------------------------
+// Make a direct subroutine call using Java calling convention (for static
+// calls and optimized virtual calls, plus calls to wrappers for run-time
+// routines); generates static stub.
+class CallStaticJavaNode : public CallJavaNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  CallStaticJavaNode(const TypeFunc* tf, address addr, ciMethod* method, int bci)
+    : CallJavaNode(tf, addr, method, bci), _name(NULL) {
+    init_class_id(Class_CallStaticJava);
+  }
+  CallStaticJavaNode(const TypeFunc* tf, address addr, const char* name, int bci,
+                     const TypePtr* adr_type)
+    : CallJavaNode(tf, addr, NULL, bci), _name(name) {
+    init_class_id(Class_CallStaticJava);
+    // This node calls a runtime stub, which often has narrow memory effects.
+    _adr_type = adr_type;
+  }
+  const char *_name;            // Runtime wrapper name
+
+  // If this is an uncommon trap, return the request code, else zero.
+  int uncommon_trap_request() const;
+  static int extract_uncommon_trap_request(const Node* call);
+
+  virtual int         Opcode() const;
+#ifndef PRODUCT
+  virtual void        dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallDynamicJavaNode----------------------------
+// Make a dispatched call using Java calling convention.
+class CallDynamicJavaNode : public CallJavaNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  CallDynamicJavaNode( const TypeFunc *tf , address addr, ciMethod* method, int vtable_index, int bci ) : CallJavaNode(tf,addr,method,bci), _vtable_index(vtable_index) {
+    init_class_id(Class_CallDynamicJava);
+  }
+
+  int _vtable_index;
+  virtual int   Opcode() const;
+#ifndef PRODUCT
+  virtual void  dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallRuntimeNode--------------------------------
+// Make a direct subroutine call node into compiled C++ code.
+class CallRuntimeNode : public CallNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  CallRuntimeNode(const TypeFunc* tf, address addr, const char* name,
+                  const TypePtr* adr_type)
+    : CallNode(tf, addr, adr_type),
+      _name(name)
+  {
+    init_class_id(Class_CallRuntime);
+  }
+
+  const char *_name;            // Printable name, if _method is NULL
+  virtual int   Opcode() const;
+  virtual void  calling_convention( BasicType* sig_bt, VMRegPair *parm_regs, uint argcnt ) const;
+
+#ifndef PRODUCT
+  virtual void  dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallLeafNode-----------------------------------
+// Make a direct subroutine call node into compiled C++ code, without
+// safepoints
+class CallLeafNode : public CallRuntimeNode {
+public:
+  CallLeafNode(const TypeFunc* tf, address addr, const char* name,
+               const TypePtr* adr_type)
+    : CallRuntimeNode(tf, addr, name, adr_type)
+  {
+    init_class_id(Class_CallLeaf);
+  }
+  virtual int   Opcode() const;
+  virtual bool        guaranteed_safepoint()  { return false; }
+#ifndef PRODUCT
+  virtual void  dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CallLeafNoFPNode-------------------------------
+// CallLeafNode, not using floating point or using it in the same manner as
+// the generated code
+class CallLeafNoFPNode : public CallLeafNode {
+public:
+  CallLeafNoFPNode(const TypeFunc* tf, address addr, const char* name,
+                   const TypePtr* adr_type)
+    : CallLeafNode(tf, addr, name, adr_type)
+  {
+  }
+  virtual int   Opcode() const;
+};
+
+
+//------------------------------Allocate---------------------------------------
+// High-level memory allocation
+//
+//  AllocateNode and AllocateArrayNode are subclasses of CallNode because they will
+//  get expanded into a code sequence containing a call.  Unlike other CallNodes,
+//  they have 2 memory projections and 2 i_o projections (which are distinguished by
+//  the _is_io_use flag in the projection.)  This is needed when expanding the node in
+//  order to differentiate the uses of the projection on the normal control path from
+//  those on the exception return path.
+//
+class AllocateNode : public CallNode {
+public:
+  enum {
+    // Output:
+    RawAddress  = TypeFunc::Parms,    // the newly-allocated raw address
+    // Inputs:
+    AllocSize   = TypeFunc::Parms,    // size (in bytes) of the new object
+    KlassNode,                        // type (maybe dynamic) of the obj.
+    InitialTest,                      // slow-path test (may be constant)
+    ALength,                          // array length (or TOP if none)
+    ParmLimit
+  };
+
+  static const TypeFunc* alloc_type() {
+    const Type** fields = TypeTuple::fields(ParmLimit - TypeFunc::Parms);
+    fields[AllocSize]   = TypeInt::POS;
+    fields[KlassNode]   = TypeInstPtr::NOTNULL;
+    fields[InitialTest] = TypeInt::BOOL;
+    fields[ALength]     = TypeInt::INT;  // length (can be a bad length)
+
+    const TypeTuple *domain = TypeTuple::make(ParmLimit, fields);
+
+    // create result type (range)
+    fields = TypeTuple::fields(1);
+    fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+    const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+    return TypeFunc::make(domain, range);
+  }
+
+  virtual uint size_of() const; // Size is bigger
+  AllocateNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
+               Node *size, Node *klass_node, Node *initial_test);
+  // Expansion modifies the JVMState, so we need to clone it
+  virtual void  clone_jvms() {
+    set_jvms(jvms()->clone_deep(Compile::current()));
+  }
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegP; }
+  virtual bool        guaranteed_safepoint()  { return false; }
+
+  // Pattern-match a possible usage of AllocateNode.
+  // Return null if no allocation is recognized.
+  // The operand is the pointer produced by the (possible) allocation.
+  // It must be a projection of the Allocate or its subsequent CastPP.
+  // (Note:  This function is defined in file graphKit.cpp, near
+  // GraphKit::new_instance/new_array, whose output it recognizes.)
+  // The 'ptr' may not have an offset unless the 'offset' argument is given.
+  static AllocateNode* Ideal_allocation(Node* ptr, PhaseTransform* phase);
+
+  // Fancy version which uses AddPNode::Ideal_base_and_offset to strip
+  // an offset, which is reported back to the caller.
+  // (Note:  AllocateNode::Ideal_allocation is defined in graphKit.cpp.)
+  static AllocateNode* Ideal_allocation(Node* ptr, PhaseTransform* phase,
+                                        intptr_t& offset);
+
+  // Dig the klass operand out of a (possible) allocation site.
+  static Node* Ideal_klass(Node* ptr, PhaseTransform* phase) {
+    AllocateNode* allo = Ideal_allocation(ptr, phase);
+    return (allo == NULL) ? NULL : allo->in(KlassNode);
+  }
+
+  // Conservatively small estimate of offset of first non-header byte.
+  int minimum_header_size() {
+    return is_AllocateArray() ? sizeof(arrayOopDesc) : sizeof(oopDesc);
+  }
+
+  // Return the corresponding initialization barrier (or null if none).
+  // Walks out edges to find it...
+  // (Note: Both InitializeNode::allocation and AllocateNode::initialization
+  // are defined in graphKit.cpp, which sets up the bidirectional relation.)
+  InitializeNode* initialization();
+
+  // Convenience for initialization->maybe_set_complete(phase)
+  bool maybe_set_complete(PhaseGVN* phase);
+};
+
+//------------------------------AllocateArray---------------------------------
+//
+// High-level array allocation
+//
+class AllocateArrayNode : public AllocateNode {
+public:
+  AllocateArrayNode(Compile* C, const TypeFunc *atype, Node *ctrl, Node *mem, Node *abio,
+                    Node* size, Node* klass_node, Node* initial_test,
+                    Node* count_val
+                    )
+    : AllocateNode(C, atype, ctrl, mem, abio, size, klass_node,
+                   initial_test)
+  {
+    init_class_id(Class_AllocateArray);
+    set_req(AllocateNode::ALength,        count_val);
+  }
+  virtual int Opcode() const;
+  virtual uint size_of() const; // Size is bigger
+
+  // Pattern-match a possible usage of AllocateArrayNode.
+  // Return null if no allocation is recognized.
+  static AllocateArrayNode* Ideal_array_allocation(Node* ptr, PhaseTransform* phase) {
+    AllocateNode* allo = Ideal_allocation(ptr, phase);
+    return (allo == NULL || !allo->is_AllocateArray())
+           ? NULL : allo->as_AllocateArray();
+  }
+
+  // Dig the length operand out of a (possible) array allocation site.
+  static Node* Ideal_length(Node* ptr, PhaseTransform* phase) {
+    AllocateArrayNode* allo = Ideal_array_allocation(ptr, phase);
+    return (allo == NULL) ? NULL : allo->in(AllocateNode::ALength);
+  }
+};
+
+//------------------------------AbstractLockNode-----------------------------------
+class AbstractLockNode: public CallNode {
+private:
+ bool _eliminate;    // indicates this lock can be safely eliminated
+#ifndef PRODUCT
+  NamedCounter* _counter;
+#endif
+
+protected:
+  // helper functions for lock elimination
+  //
+
+  bool find_matching_unlock(const Node* ctrl, LockNode* lock,
+                            GrowableArray<AbstractLockNode*> &lock_ops);
+  bool find_lock_and_unlock_through_if(Node* node, LockNode* lock,
+                                       GrowableArray<AbstractLockNode*> &lock_ops);
+  bool find_unlocks_for_region(const RegionNode* region, LockNode* lock,
+                               GrowableArray<AbstractLockNode*> &lock_ops);
+  LockNode *find_matching_lock(UnlockNode* unlock);
+
+
+public:
+  AbstractLockNode(const TypeFunc *tf)
+    : CallNode(tf, NULL, TypeRawPtr::BOTTOM),
+      _eliminate(false)
+  {
+#ifndef PRODUCT
+    _counter = NULL;
+#endif
+  }
+  virtual int Opcode() const = 0;
+  Node *   obj_node() const       {return in(TypeFunc::Parms + 0); }
+  Node *   box_node() const       {return in(TypeFunc::Parms + 1); }
+  Node *   fastlock_node() const  {return in(TypeFunc::Parms + 2); }
+  const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+  virtual uint size_of() const { return sizeof(*this); }
+
+  bool is_eliminated()         {return _eliminate; }
+  // mark node as eliminated and update the counter if there is one
+  void set_eliminated();
+
+#ifndef PRODUCT
+  void create_lock_counter(JVMState* s);
+  NamedCounter* counter() const { return _counter; }
+#endif
+};
+
+//------------------------------Lock---------------------------------------
+// High-level lock operation
+//
+// This is a subclass of CallNode because it is a macro node which gets expanded
+// into a code sequence containing a call.  This node takes 3 "parameters":
+//    0  -  object to lock
+//    1 -   a BoxLockNode
+//    2 -   a FastLockNode
+//
+class LockNode : public AbstractLockNode {
+public:
+
+  static const TypeFunc *lock_type() {
+    // create input type (domain)
+    const Type **fields = TypeTuple::fields(3);
+    fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
+    fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;    // Address of stack location for lock
+    fields[TypeFunc::Parms+2] = TypeInt::BOOL;         // FastLock
+    const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+3,fields);
+
+    // create result type (range)
+    fields = TypeTuple::fields(0);
+
+    const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+    return TypeFunc::make(domain,range);
+  }
+
+  virtual int Opcode() const;
+  virtual uint size_of() const; // Size is bigger
+  LockNode(Compile* C, const TypeFunc *tf) : AbstractLockNode( tf ) {
+    init_class_id(Class_Lock);
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
+  virtual bool        guaranteed_safepoint()  { return false; }
+
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  // Expansion modifies the JVMState, so we need to clone it
+  virtual void  clone_jvms() {
+    set_jvms(jvms()->clone_deep(Compile::current()));
+  }
+};
+
+//------------------------------Unlock---------------------------------------
+// High-level unlock operation
+class UnlockNode : public AbstractLockNode {
+public:
+  virtual int Opcode() const;
+  virtual uint size_of() const; // Size is bigger
+  UnlockNode(Compile* C, const TypeFunc *tf) : AbstractLockNode( tf ) {
+    init_class_id(Class_Unlock);
+    init_flags(Flag_is_macro);
+    C->add_macro_node(this);
+  }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  // unlock is never a safepoint
+  virtual bool        guaranteed_safepoint()  { return false; }
+};
diff --git a/src/share/vm/opto/cfgnode.cpp b/src/share/vm/opto/cfgnode.cpp
new file mode 100644
index 000000000..1c91c6be8
--- /dev/null
+++ b/src/share/vm/opto/cfgnode.cpp
@@ -0,0 +1,1954 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_cfgnode.cpp.incl"
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute the type of the RegionNode.
+const Type *RegionNode::Value( PhaseTransform *phase ) const {
+  for( uint i=1; i<req(); ++i ) {       // For all paths in
+    Node *n = in(i);            // Get Control source
+    if( !n ) continue;          // Missing inputs are TOP
+    if( phase->type(n) == Type::CONTROL )
+      return Type::CONTROL;
+  }
+  return Type::TOP;             // All paths dead?  Then so are we
+}
+
+//------------------------------Identity---------------------------------------
+// Check for Region being Identity.
+Node *RegionNode::Identity( PhaseTransform *phase ) {
+  // Cannot have Region be an identity, even if it has only 1 input.
+  // Phi users cannot have their Region input folded away for them,
+  // since they need to select the proper data input
+  return this;
+}
+
+//------------------------------merge_region-----------------------------------
+// If a Region flows into a Region, merge into one big happy merge.  This is
+// hard to do if there is stuff that has to happen
+static Node *merge_region(RegionNode *region, PhaseGVN *phase) {
+  if( region->Opcode() != Op_Region ) // Do not do to LoopNodes
+    return NULL;
+  Node *progress = NULL;        // Progress flag
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+
+  uint rreq = region->req();
+  for( uint i = 1; i < rreq; i++ ) {
+    Node *r = region->in(i);
+    if( r && r->Opcode() == Op_Region && // Found a region?
+        r->in(0) == r &&        // Not already collapsed?
+        r != region &&          // Avoid stupid situations
+        r->outcnt() == 2 ) {    // Self user and 'region' user only?
+      assert(!r->as_Region()->has_phi(), "no phi users");
+      if( !progress ) {         // No progress
+        if (region->has_phi()) {
+          return NULL;        // Only flatten if no Phi users
+          // igvn->hash_delete( phi );
+        }
+        igvn->hash_delete( region );
+        progress = region;      // Making progress
+      }
+      igvn->hash_delete( r );
+
+      // Append inputs to 'r' onto 'region'
+      for( uint j = 1; j < r->req(); j++ ) {
+        // Move an input from 'r' to 'region'
+        region->add_req(r->in(j));
+        r->set_req(j, phase->C->top());
+        // Update phis of 'region'
+        //for( uint k = 0; k < max; k++ ) {
+        //  Node *phi = region->out(k);
+        //  if( phi->is_Phi() ) {
+        //    phi->add_req(phi->in(i));
+        //  }
+        //}
+
+        rreq++;                 // One more input to Region
+      } // Found a region to merge into Region
+      // Clobber pointer to the now dead 'r'
+      region->set_req(i, phase->C->top());
+    }
+  }
+
+  return progress;
+}
+
+
+
+//--------------------------------has_phi--------------------------------------
+// Helper function: Return any PhiNode that uses this region or NULL
+PhiNode* RegionNode::has_phi() const {
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    Node* phi = fast_out(i);
+    if (phi->is_Phi()) {   // Check for Phi users
+      assert(phi->in(0) == (Node*)this, "phi uses region only via in(0)");
+      return phi->as_Phi();  // this one is good enough
+    }
+  }
+
+  return NULL;
+}
+
+
+//-----------------------------has_unique_phi----------------------------------
+// Helper function: Return the only PhiNode that uses this region or NULL
+PhiNode* RegionNode::has_unique_phi() const {
+  // Check that only one use is a Phi
+  PhiNode* only_phi = NULL;
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    Node* phi = fast_out(i);
+    if (phi->is_Phi()) {   // Check for Phi users
+      assert(phi->in(0) == (Node*)this, "phi uses region only via in(0)");
+      if (only_phi == NULL) {
+        only_phi = phi->as_Phi();
+      } else {
+        return NULL;  // multiple phis
+      }
+    }
+  }
+
+  return only_phi;
+}
+
+
+//------------------------------check_phi_clipping-----------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Check inputs to the Phi
+static bool check_phi_clipping( PhiNode *phi, ConNode * &min, uint &min_idx, ConNode * &max, uint &max_idx, Node * &val, uint &val_idx ) {
+  min     = NULL;
+  max     = NULL;
+  val     = NULL;
+  min_idx = 0;
+  max_idx = 0;
+  val_idx = 0;
+  uint  phi_max = phi->req();
+  if( phi_max == 4 ) {
+    for( uint j = 1; j < phi_max; ++j ) {
+      Node *n = phi->in(j);
+      int opcode = n->Opcode();
+      switch( opcode ) {
+      case Op_ConI:
+        {
+          if( min == NULL ) {
+            min     = n->Opcode() == Op_ConI ? (ConNode*)n : NULL;
+            min_idx = j;
+          } else {
+            max     = n->Opcode() == Op_ConI ? (ConNode*)n : NULL;
+            max_idx = j;
+            if( min->get_int() > max->get_int() ) {
+              // Swap min and max
+              ConNode *temp;
+              uint     temp_idx;
+              temp     = min;     min     = max;     max     = temp;
+              temp_idx = min_idx; min_idx = max_idx; max_idx = temp_idx;
+            }
+          }
+        }
+        break;
+      default:
+        {
+          val = n;
+          val_idx = j;
+        }
+        break;
+      }
+    }
+  }
+  return ( min && max && val && (min->get_int() <= 0) && (max->get_int() >=0) );
+}
+
+
+//------------------------------check_if_clipping------------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Check that inputs to Region come from two IfNodes,
+//
+//            If
+//      False    True
+//       If        |
+//  False  True    |
+//    |      |     |
+//  RegionNode_inputs
+//
+static bool check_if_clipping( const RegionNode *region, IfNode * &bot_if, IfNode * &top_if ) {
+  top_if = NULL;
+  bot_if = NULL;
+
+  // Check control structure above RegionNode for (if  ( if  ) )
+  Node *in1 = region->in(1);
+  Node *in2 = region->in(2);
+  Node *in3 = region->in(3);
+  // Check that all inputs are projections
+  if( in1->is_Proj() && in2->is_Proj() && in3->is_Proj() ) {
+    Node *in10 = in1->in(0);
+    Node *in20 = in2->in(0);
+    Node *in30 = in3->in(0);
+    // Check that #1 and #2 are ifTrue and ifFalse from same If
+    if( in10 != NULL && in10->is_If() &&
+        in20 != NULL && in20->is_If() &&
+        in30 != NULL && in30->is_If() && in10 == in20 &&
+        (in1->Opcode() != in2->Opcode()) ) {
+      Node  *in100 = in10->in(0);
+      Node *in1000 = (in100 != NULL && in100->is_Proj()) ? in100->in(0) : NULL;
+      // Check that control for in10 comes from other branch of IF from in3
+      if( in1000 != NULL && in1000->is_If() &&
+          in30 == in1000 && (in3->Opcode() != in100->Opcode()) ) {
+        // Control pattern checks
+        top_if = (IfNode*)in1000;
+        bot_if = (IfNode*)in10;
+      }
+    }
+  }
+
+  return (top_if != NULL);
+}
+
+
+//------------------------------check_convf2i_clipping-------------------------
+// Helper function for RegionNode's identification of FP clipping
+// Verify that the value input to the phi comes from "ConvF2I; LShift; RShift"
+static bool check_convf2i_clipping( PhiNode *phi, uint idx, ConvF2INode * &convf2i, Node *min, Node *max) {
+  convf2i = NULL;
+
+  // Check for the RShiftNode
+  Node *rshift = phi->in(idx);
+  assert( rshift, "Previous checks ensure phi input is present");
+  if( rshift->Opcode() != Op_RShiftI )  { return false; }
+
+  // Check for the LShiftNode
+  Node *lshift = rshift->in(1);
+  assert( lshift, "Previous checks ensure phi input is present");
+  if( lshift->Opcode() != Op_LShiftI )  { return false; }
+
+  // Check for the ConvF2INode
+  Node *conv = lshift->in(1);
+  if( conv->Opcode() != Op_ConvF2I ) { return false; }
+
+  // Check that shift amounts are only to get sign bits set after F2I
+  jint max_cutoff     = max->get_int();
+  jint min_cutoff     = min->get_int();
+  jint left_shift     = lshift->in(2)->get_int();
+  jint right_shift    = rshift->in(2)->get_int();
+  jint max_post_shift = nth_bit(BitsPerJavaInteger - left_shift - 1);
+  if( left_shift != right_shift ||
+      0 > left_shift || left_shift >= BitsPerJavaInteger ||
+      max_post_shift < max_cutoff ||
+      max_post_shift < -min_cutoff ) {
+    // Shifts are necessary but current transformation eliminates them
+    return false;
+  }
+
+  // OK to return the result of ConvF2I without shifting
+  convf2i = (ConvF2INode*)conv;
+  return true;
+}
+
+
+//------------------------------check_compare_clipping-------------------------
+// Helper function for RegionNode's identification of FP clipping
+static bool check_compare_clipping( bool less_than, IfNode *iff, ConNode *limit, Node * & input ) {
+  Node *i1 = iff->in(1);
+  if ( !i1->is_Bool() ) { return false; }
+  BoolNode *bool1 = i1->as_Bool();
+  if(       less_than && bool1->_test._test != BoolTest::le ) { return false; }
+  else if( !less_than && bool1->_test._test != BoolTest::lt ) { return false; }
+  const Node *cmpF = bool1->in(1);
+  if( cmpF->Opcode() != Op_CmpF )      { return false; }
+  // Test that the float value being compared against
+  // is equivalent to the int value used as a limit
+  Node *nodef = cmpF->in(2);
+  if( nodef->Opcode() != Op_ConF ) { return false; }
+  jfloat conf = nodef->getf();
+  jint   coni = limit->get_int();
+  if( ((int)conf) != coni )        { return false; }
+  input = cmpF->in(1);
+  return true;
+}
+
+//------------------------------is_unreachable_region--------------------------
+// Find if the Region node is reachable from the root.
+bool RegionNode::is_unreachable_region(PhaseGVN *phase) const {
+  assert(req() == 2, "");
+
+  // First, cut the simple case of fallthrough region when NONE of
+  // region's phis references itself directly or through a data node.
+  uint max = outcnt();
+  uint i;
+  for (i = 0; i < max; i++) {
+    Node* phi = raw_out(i);
+    if (phi != NULL && phi->is_Phi()) {
+      assert(phase->eqv(phi->in(0), this) && phi->req() == 2, "");
+      if (phi->outcnt() == 0)
+        continue; // Safe case - no loops
+      if (phi->outcnt() == 1) {
+        Node* u = phi->raw_out(0);
+        // Skip if only one use is an other Phi or Call or Uncommon trap.
+        // It is safe to consider this case as fallthrough.
+        if (u != NULL && (u->is_Phi() || u->is_CFG()))
+          continue;
+      }
+      // Check when phi references itself directly or through an other node.
+      if (phi->as_Phi()->simple_data_loop_check(phi->in(1)) >= PhiNode::Unsafe)
+        break; // Found possible unsafe data loop.
+    }
+  }
+  if (i >= max)
+    return false; // An unsafe case was NOT found - don't need graph walk.
+
+  // Unsafe case - check if the Region node is reachable from root.
+  ResourceMark rm;
+
+  Arena *a = Thread::current()->resource_area();
+  Node_List nstack(a);
+  VectorSet visited(a);
+
+  // Mark all control nodes reachable from root outputs
+  Node *n = (Node*)phase->C->root();
+  nstack.push(n);
+  visited.set(n->_idx);
+  while (nstack.size() != 0) {
+    n = nstack.pop();
+    uint max = n->outcnt();
+    for (uint i = 0; i < max; i++) {
+      Node* m = n->raw_out(i);
+      if (m != NULL && m->is_CFG()) {
+        if (phase->eqv(m, this)) {
+          return false; // We reached the Region node - it is not dead.
+        }
+        if (!visited.test_set(m->_idx))
+          nstack.push(m);
+      }
+    }
+  }
+
+  return true; // The Region node is unreachable - it is dead.
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Must preserve
+// the CFG, but we can still strip out dead paths.
+Node *RegionNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( !can_reshape && !in(0) ) return NULL;     // Already degraded to a Copy
+  assert(!in(0) || !in(0)->is_Root(), "not a specially hidden merge");
+
+  // Check for RegionNode with no Phi users and both inputs come from either
+  // arm of the same IF.  If found, then the control-flow split is useless.
+  bool has_phis = false;
+  if (can_reshape) {            // Need DU info to check for Phi users
+    has_phis = (has_phi() != NULL);       // Cache result
+    if (!has_phis) {            // No Phi users?  Nothing merging?
+      for (uint i = 1; i < req()-1; i++) {
+        Node *if1 = in(i);
+        if( !if1 ) continue;
+        Node *iff = if1->in(0);
+        if( !iff || !iff->is_If() ) continue;
+        for( uint j=i+1; j<req(); j++ ) {
+          if( in(j) && in(j)->in(0) == iff &&
+              if1->Opcode() != in(j)->Opcode() ) {
+            // Add the IF Projections to the worklist. They (and the IF itself)
+            // will be eliminated if dead.
+            phase->is_IterGVN()->add_users_to_worklist(iff);
+            set_req(i, iff->in(0));// Skip around the useless IF diamond
+            set_req(j, NULL);
+            return this;      // Record progress
+          }
+        }
+      }
+    }
+  }
+
+  // Remove TOP or NULL input paths. If only 1 input path remains, this Region
+  // degrades to a copy.
+  bool add_to_worklist = false;
+  int cnt = 0;                  // Count of values merging
+  DEBUG_ONLY( int cnt_orig = req(); ) // Save original inputs count
+  int del_it = 0;               // The last input path we delete
+  // For all inputs...
+  for( uint i=1; i<req(); ++i ){// For all paths in
+    Node *n = in(i);            // Get the input
+    if( n != NULL ) {
+      // Remove useless control copy inputs
+      if( n->is_Region() && n->as_Region()->is_copy() ) {
+        set_req(i, n->nonnull_req());
+        i--;
+        continue;
+      }
+      if( n->is_Proj() ) {      // Remove useless rethrows
+        Node *call = n->in(0);
+        if (call->is_Call() && call->as_Call()->entry_point() == OptoRuntime::rethrow_stub()) {
+          set_req(i, call->in(0));
+          i--;
+          continue;
+        }
+      }
+      if( phase->type(n) == Type::TOP ) {
+        set_req(i, NULL);       // Ignore TOP inputs
+        i--;
+        continue;
+      }
+      cnt++;                    // One more value merging
+
+    } else if (can_reshape) {   // Else found dead path with DU info
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      del_req(i);               // Yank path from self
+      del_it = i;
+      uint max = outcnt();
+      DUIterator j;
+      bool progress = true;
+      while(progress) {         // Need to establish property over all users
+        progress = false;
+        for (j = outs(); has_out(j); j++) {
+          Node *n = out(j);
+          if( n->req() != req() && n->is_Phi() ) {
+            assert( n->in(0) == this, "" );
+            igvn->hash_delete(n); // Yank from hash before hacking edges
+            n->set_req_X(i,NULL,igvn);// Correct DU info
+            n->del_req(i);        // Yank path from Phis
+            if( max != outcnt() ) {
+              progress = true;
+              j = refresh_out_pos(j);
+              max = outcnt();
+            }
+          }
+        }
+      }
+      add_to_worklist = true;
+      i--;
+    }
+  }
+
+  if (can_reshape && cnt == 1) {
+    // Is it dead loop?
+    // If it is LoopNopde it had 2 (+1 itself) inputs and
+    // one of them was cut. The loop is dead if it was EntryContol.
+    assert(!this->is_Loop() || cnt_orig == 3, "Loop node should have 3 inputs");
+    if (this->is_Loop() && del_it == LoopNode::EntryControl ||
+       !this->is_Loop() && has_phis && is_unreachable_region(phase)) {
+      // Yes,  the region will be removed during the next step below.
+      // Cut the backedge input and remove phis since no data paths left.
+      // We don't cut outputs to other nodes here since we need to put them
+      // on the worklist.
+      del_req(1);
+      cnt = 0;
+      assert( req() == 1, "no more inputs expected" );
+      uint max = outcnt();
+      bool progress = true;
+      Node *top = phase->C->top();
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      DUIterator j;
+      while(progress) {
+        progress = false;
+        for (j = outs(); has_out(j); j++) {
+          Node *n = out(j);
+          if( n->is_Phi() ) {
+            assert( igvn->eqv(n->in(0), this), "" );
+            assert( n->req() == 2 &&  n->in(1) != NULL, "Only one data input expected" );
+            // Break dead loop data path.
+            // Eagerly replace phis with top to avoid phis copies generation.
+            igvn->add_users_to_worklist(n);
+            igvn->hash_delete(n); // Yank from hash before hacking edges
+            igvn->subsume_node(n, top);
+            if( max != outcnt() ) {
+              progress = true;
+              j = refresh_out_pos(j);
+              max = outcnt();
+            }
+          }
+        }
+      }
+      add_to_worklist = true;
+    }
+  }
+  if (add_to_worklist) {
+    phase->is_IterGVN()->add_users_to_worklist(this); // Revisit collapsed Phis
+  }
+
+  if( cnt <= 1 ) {              // Only 1 path in?
+    set_req(0, NULL);           // Null control input for region copy
+    if( cnt == 0 && !can_reshape) { // Parse phase - leave the node as it is.
+      // No inputs or all inputs are NULL.
+      return NULL;
+    } else if (can_reshape) {   // Optimization phase - remove the node
+      PhaseIterGVN *igvn = phase->is_IterGVN();
+      Node *parent_ctrl;
+      if( cnt == 0 ) {
+        assert( req() == 1, "no inputs expected" );
+        // During IGVN phase such region will be subsumed by TOP node
+        // so region's phis will have TOP as control node.
+        // Kill phis here to avoid it. PhiNode::is_copy() will be always false.
+        // Also set other user's input to top.
+        parent_ctrl = phase->C->top();
+      } else {
+        // The fallthrough case since we already checked dead loops above.
+        parent_ctrl = in(1);
+        assert(parent_ctrl != NULL, "Region is a copy of some non-null control");
+        assert(!igvn->eqv(parent_ctrl, this), "Close dead loop");
+      }
+      if (!add_to_worklist)
+        igvn->add_users_to_worklist(this); // Check for further allowed opts
+      for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+        Node* n = last_out(i);
+        igvn->hash_delete(n); // Remove from worklist before modifying edges
+        if( n->is_Phi() ) {   // Collapse all Phis
+          // Eagerly replace phis to avoid copies generation.
+          igvn->add_users_to_worklist(n);
+          igvn->hash_delete(n); // Yank from hash before hacking edges
+          if( cnt == 0 ) {
+            assert( n->req() == 1, "No data inputs expected" );
+            igvn->subsume_node(n, parent_ctrl); // replaced by top
+          } else {
+            assert( n->req() == 2 &&  n->in(1) != NULL, "Only one data input expected" );
+            Node* in1 = n->in(1);               // replaced by unique input
+            if( n->as_Phi()->is_unsafe_data_reference(in1) )
+              in1 = phase->C->top();            // replaced by top
+            igvn->subsume_node(n, in1);
+          }
+        }
+        else if( n->is_Region() ) { // Update all incoming edges
+          assert( !igvn->eqv(n, this), "Must be removed from DefUse edges");
+          uint uses_found = 0;
+          for( uint k=1; k < n->req(); k++ ) {
+            if( n->in(k) == this ) {
+              n->set_req(k, parent_ctrl);
+              uses_found++;
+            }
+          }
+          if( uses_found > 1 ) { // (--i) done at the end of the loop.
+            i -= (uses_found - 1);
+          }
+        }
+        else {
+          assert( igvn->eqv(n->in(0), this), "Expect RegionNode to be control parent");
+          n->set_req(0, parent_ctrl);
+        }
+#ifdef ASSERT
+        for( uint k=0; k < n->req(); k++ ) {
+          assert( !igvn->eqv(n->in(k), this), "All uses of RegionNode should be gone");
+        }
+#endif
+      }
+      // Remove the RegionNode itself from DefUse info
+      igvn->remove_dead_node(this);
+      return NULL;
+    }
+    return this;                // Record progress
+  }
+
+
+  // If a Region flows into a Region, merge into one big happy merge.
+  if (can_reshape) {
+    Node *m = merge_region(this, phase);
+    if (m != NULL)  return m;
+  }
+
+  // Check if this region is the root of a clipping idiom on floats
+  if( ConvertFloat2IntClipping && can_reshape && req() == 4 ) {
+    // Check that only one use is a Phi and that it simplifies to two constants +
+    PhiNode* phi = has_unique_phi();
+    if (phi != NULL) {          // One Phi user
+      // Check inputs to the Phi
+      ConNode *min;
+      ConNode *max;
+      Node    *val;
+      uint     min_idx;
+      uint     max_idx;
+      uint     val_idx;
+      if( check_phi_clipping( phi, min, min_idx, max, max_idx, val, val_idx )  ) {
+        IfNode *top_if;
+        IfNode *bot_if;
+        if( check_if_clipping( this, bot_if, top_if ) ) {
+          // Control pattern checks, now verify compares
+          Node   *top_in = NULL;   // value being compared against
+          Node   *bot_in = NULL;
+          if( check_compare_clipping( true,  bot_if, min, bot_in ) &&
+              check_compare_clipping( false, top_if, max, top_in ) ) {
+            if( bot_in == top_in ) {
+              PhaseIterGVN *gvn = phase->is_IterGVN();
+              assert( gvn != NULL, "Only had DefUse info in IterGVN");
+              // Only remaining check is that bot_in == top_in == (Phi's val + mods)
+
+              // Check for the ConvF2INode
+              ConvF2INode *convf2i;
+              if( check_convf2i_clipping( phi, val_idx, convf2i, min, max ) &&
+                convf2i->in(1) == bot_in ) {
+                // Matched pattern, including LShiftI; RShiftI, replace with integer compares
+                // max test
+                Node *cmp   = gvn->register_new_node_with_optimizer(new (phase->C, 3) CmpINode( convf2i, min ));
+                Node *boo   = gvn->register_new_node_with_optimizer(new (phase->C, 2) BoolNode( cmp, BoolTest::lt ));
+                IfNode *iff = (IfNode*)gvn->register_new_node_with_optimizer(new (phase->C, 2) IfNode( top_if->in(0), boo, PROB_UNLIKELY_MAG(5), top_if->_fcnt ));
+                Node *if_min= gvn->register_new_node_with_optimizer(new (phase->C, 1) IfTrueNode (iff));
+                Node *ifF   = gvn->register_new_node_with_optimizer(new (phase->C, 1) IfFalseNode(iff));
+                // min test
+                cmp         = gvn->register_new_node_with_optimizer(new (phase->C, 3) CmpINode( convf2i, max ));
+                boo         = gvn->register_new_node_with_optimizer(new (phase->C, 2) BoolNode( cmp, BoolTest::gt ));
+                iff         = (IfNode*)gvn->register_new_node_with_optimizer(new (phase->C, 2) IfNode( ifF, boo, PROB_UNLIKELY_MAG(5), bot_if->_fcnt ));
+                Node *if_max= gvn->register_new_node_with_optimizer(new (phase->C, 1) IfTrueNode (iff));
+                ifF         = gvn->register_new_node_with_optimizer(new (phase->C, 1) IfFalseNode(iff));
+                // update input edges to region node
+                set_req_X( min_idx, if_min, gvn );
+                set_req_X( max_idx, if_max, gvn );
+                set_req_X( val_idx, ifF,    gvn );
+                // remove unnecessary 'LShiftI; RShiftI' idiom
+                gvn->hash_delete(phi);
+                phi->set_req_X( val_idx, convf2i, gvn );
+                gvn->hash_find_insert(phi);
+                // Return transformed region node
+                return this;
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  return NULL;
+}
+
+
+
+const RegMask &RegionNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+// Find the one non-null required input.  RegionNode only
+Node *Node::nonnull_req() const {
+  assert( is_Region(), "" );
+  for( uint i = 1; i < _cnt; i++ )
+    if( in(i) )
+      return in(i);
+  ShouldNotReachHere();
+  return NULL;
+}
+
+
+//=============================================================================
+// note that these functions assume that the _adr_type field is flattened
+uint PhiNode::hash() const {
+  const Type* at = _adr_type;
+  return TypeNode::hash() + (at ? at->hash() : 0);
+}
+uint PhiNode::cmp( const Node &n ) const {
+  return TypeNode::cmp(n) && _adr_type == ((PhiNode&)n)._adr_type;
+}
+static inline
+const TypePtr* flatten_phi_adr_type(const TypePtr* at) {
+  if (at == NULL || at == TypePtr::BOTTOM)  return at;
+  return Compile::current()->alias_type(at)->adr_type();
+}
+
+//----------------------------make---------------------------------------------
+// create a new phi with edges matching r and set (initially) to x
+PhiNode* PhiNode::make(Node* r, Node* x, const Type *t, const TypePtr* at) {
+  uint preds = r->req();   // Number of predecessor paths
+  assert(t != Type::MEMORY || at == flatten_phi_adr_type(at), "flatten at");
+  PhiNode* p = new (Compile::current(), preds) PhiNode(r, t, at);
+  for (uint j = 1; j < preds; j++) {
+    // Fill in all inputs, except those which the region does not yet have
+    if (r->in(j) != NULL)
+      p->init_req(j, x);
+  }
+  return p;
+}
+PhiNode* PhiNode::make(Node* r, Node* x) {
+  const Type*    t  = x->bottom_type();
+  const TypePtr* at = NULL;
+  if (t == Type::MEMORY)  at = flatten_phi_adr_type(x->adr_type());
+  return make(r, x, t, at);
+}
+PhiNode* PhiNode::make_blank(Node* r, Node* x) {
+  const Type*    t  = x->bottom_type();
+  const TypePtr* at = NULL;
+  if (t == Type::MEMORY)  at = flatten_phi_adr_type(x->adr_type());
+  return new (Compile::current(), r->req()) PhiNode(r, t, at);
+}
+
+
+//------------------------slice_memory-----------------------------------------
+// create a new phi with narrowed memory type
+PhiNode* PhiNode::slice_memory(const TypePtr* adr_type) const {
+  PhiNode* mem = (PhiNode*) clone();
+  *(const TypePtr**)&mem->_adr_type = adr_type;
+  // convert self-loops, or else we get a bad graph
+  for (uint i = 1; i < req(); i++) {
+    if ((const Node*)in(i) == this)  mem->set_req(i, mem);
+  }
+  mem->verify_adr_type();
+  return mem;
+}
+
+//------------------------verify_adr_type--------------------------------------
+#ifdef ASSERT
+void PhiNode::verify_adr_type(VectorSet& visited, const TypePtr* at) const {
+  if (visited.test_set(_idx))  return;  //already visited
+
+  // recheck constructor invariants:
+  verify_adr_type(false);
+
+  // recheck local phi/phi consistency:
+  assert(_adr_type == at || _adr_type == TypePtr::BOTTOM,
+         "adr_type must be consistent across phi nest");
+
+  // walk around
+  for (uint i = 1; i < req(); i++) {
+    Node* n = in(i);
+    if (n == NULL)  continue;
+    const Node* np = in(i);
+    if (np->is_Phi()) {
+      np->as_Phi()->verify_adr_type(visited, at);
+    } else if (n->bottom_type() == Type::TOP
+               || (n->is_Mem() && n->in(MemNode::Address)->bottom_type() == Type::TOP)) {
+      // ignore top inputs
+    } else {
+      const TypePtr* nat = flatten_phi_adr_type(n->adr_type());
+      // recheck phi/non-phi consistency at leaves:
+      assert((nat != NULL) == (at != NULL), "");
+      assert(nat == at || nat == TypePtr::BOTTOM,
+             "adr_type must be consistent at leaves of phi nest");
+    }
+  }
+}
+
+// Verify a whole nest of phis rooted at this one.
+void PhiNode::verify_adr_type(bool recursive) const {
+  if (is_error_reported())  return;  // muzzle asserts when debugging an error
+  if (Node::in_dump())      return;  // muzzle asserts when printing
+
+  assert((_type == Type::MEMORY) == (_adr_type != NULL), "adr_type for memory phis only");
+
+  if (!VerifyAliases)       return;  // verify thoroughly only if requested
+
+  assert(_adr_type == flatten_phi_adr_type(_adr_type),
+         "Phi::adr_type must be pre-normalized");
+
+  if (recursive) {
+    VectorSet visited(Thread::current()->resource_area());
+    verify_adr_type(visited, _adr_type);
+  }
+}
+#endif
+
+
+//------------------------------Value------------------------------------------
+// Compute the type of the PhiNode
+const Type *PhiNode::Value( PhaseTransform *phase ) const {
+  Node *r = in(0);              // RegionNode
+  if( !r )                      // Copy or dead
+    return in(1) ? phase->type(in(1)) : Type::TOP;
+
+  // Note: During parsing, phis are often transformed before their regions.
+  // This means we have to use type_or_null to defend against untyped regions.
+  if( phase->type_or_null(r) == Type::TOP )  // Dead code?
+    return Type::TOP;
+
+  // Check for trip-counted loop.  If so, be smarter.
+  CountedLoopNode *l = r->is_CountedLoop() ? r->as_CountedLoop() : NULL;
+  if( l && l->can_be_counted_loop(phase) &&
+      ((const Node*)l->phi() == this) ) { // Trip counted loop!
+    // protect against init_trip() or limit() returning NULL
+    const Node *init   = l->init_trip();
+    const Node *limit  = l->limit();
+    if( init != NULL && limit != NULL && l->stride_is_con() ) {
+      const TypeInt *lo = init ->bottom_type()->isa_int();
+      const TypeInt *hi = limit->bottom_type()->isa_int();
+      if( lo && hi ) {            // Dying loops might have TOP here
+        int stride = l->stride_con();
+        if( stride < 0 ) {          // Down-counter loop
+          const TypeInt *tmp = lo; lo = hi; hi = tmp;
+          stride = -stride;
+        }
+        if( lo->_hi < hi->_lo )     // Reversed endpoints are well defined :-(
+          return TypeInt::make(lo->_lo,hi->_hi,3);
+      }
+    }
+  }
+
+  // Until we have harmony between classes and interfaces in the type
+  // lattice, we must tread carefully around phis which implicitly
+  // convert the one to the other.
+  const TypeInstPtr* ttip = _type->isa_instptr();
+  bool is_intf = false;
+  if (ttip != NULL) {
+    ciKlass* k = ttip->klass();
+    if (k->is_loaded() && k->is_interface())
+      is_intf = true;
+  }
+
+  // Default case: merge all inputs
+  const Type *t = Type::TOP;        // Merged type starting value
+  for (uint i = 1; i < req(); ++i) {// For all paths in
+    // Reachable control path?
+    if (r->in(i) && phase->type(r->in(i)) == Type::CONTROL) {
+      const Type* ti = phase->type(in(i));
+      // We assume that each input of an interface-valued Phi is a true
+      // subtype of that interface.  This might not be true of the meet
+      // of all the input types.  The lattice is not distributive in
+      // such cases.  Ward off asserts in type.cpp by refusing to do
+      // meets between interfaces and proper classes.
+      const TypeInstPtr* tiip = ti->isa_instptr();
+      if (tiip) {
+        bool ti_is_intf = false;
+        ciKlass* k = tiip->klass();
+        if (k->is_loaded() && k->is_interface())
+          ti_is_intf = true;
+        if (is_intf != ti_is_intf)
+          { t = _type; break; }
+      }
+      t = t->meet(ti);
+    }
+  }
+
+  // The worst-case type (from ciTypeFlow) should be consistent with "t".
+  // That is, we expect that "t->higher_equal(_type)" holds true.
+  // There are various exceptions:
+  // - Inputs which are phis might in fact be widened unnecessarily.
+  //   For example, an input might be a widened int while the phi is a short.
+  // - Inputs might be BotPtrs but this phi is dependent on a null check,
+  //   and postCCP has removed the cast which encodes the result of the check.
+  // - The type of this phi is an interface, and the inputs are classes.
+  // - Value calls on inputs might produce fuzzy results.
+  //   (Occurrences of this case suggest improvements to Value methods.)
+  //
+  // It is not possible to see Type::BOTTOM values as phi inputs,
+  // because the ciTypeFlow pre-pass produces verifier-quality types.
+  const Type* ft = t->filter(_type);  // Worst case type
+
+#ifdef ASSERT
+  // The following logic has been moved into TypeOopPtr::filter.
+  const Type* jt = t->join(_type);
+  if( jt->empty() ) {           // Emptied out???
+
+    // Check for evil case of 't' being a class and '_type' expecting an
+    // interface.  This can happen because the bytecodes do not contain
+    // enough type info to distinguish a Java-level interface variable
+    // from a Java-level object variable.  If we meet 2 classes which
+    // both implement interface I, but their meet is at 'j/l/O' which
+    // doesn't implement I, we have no way to tell if the result should
+    // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
+    // into a Phi which "knows" it's an Interface type we'll have to
+    // uplift the type.
+    if( !t->empty() && ttip && ttip->is_loaded() && ttip->klass()->is_interface() )
+      { assert(ft == _type, ""); } // Uplift to interface
+    // Otherwise it's something stupid like non-overlapping int ranges
+    // found on dying counted loops.
+    else
+      { assert(ft == Type::TOP, ""); } // Canonical empty value
+  }
+
+  else {
+
+    // If we have an interface-typed Phi and we narrow to a class type, the join
+    // should report back the class.  However, if we have a J/L/Object
+    // class-typed Phi and an interface flows in, it's possible that the meet &
+    // join report an interface back out.  This isn't possible but happens
+    // because the type system doesn't interact well with interfaces.
+    const TypeInstPtr *jtip = jt->isa_instptr();
+    if( jtip && ttip ) {
+      if( jtip->is_loaded() &&  jtip->klass()->is_interface() &&
+          ttip->is_loaded() && !ttip->klass()->is_interface() )
+        // Happens in a CTW of rt.jar, 320-341, no extra flags
+        { assert(ft == ttip->cast_to_ptr_type(jtip->ptr()), ""); jt = ft; }
+    }
+    if (jt != ft && jt->base() == ft->base()) {
+      if (jt->isa_int() &&
+          jt->is_int()->_lo == ft->is_int()->_lo &&
+          jt->is_int()->_hi == ft->is_int()->_hi)
+        jt = ft;
+      if (jt->isa_long() &&
+          jt->is_long()->_lo == ft->is_long()->_lo &&
+          jt->is_long()->_hi == ft->is_long()->_hi)
+        jt = ft;
+    }
+    if (jt != ft) {
+      tty->print("merge type:  "); t->dump(); tty->cr();
+      tty->print("kill type:   "); _type->dump(); tty->cr();
+      tty->print("join type:   "); jt->dump(); tty->cr();
+      tty->print("filter type: "); ft->dump(); tty->cr();
+    }
+    assert(jt == ft, "");
+  }
+#endif //ASSERT
+
+  // Deal with conversion problems found in data loops.
+  ft = phase->saturate(ft, phase->type_or_null(this), _type);
+
+  return ft;
+}
+
+
+//------------------------------is_diamond_phi---------------------------------
+// Does this Phi represent a simple well-shaped diamond merge?  Return the
+// index of the true path or 0 otherwise.
+int PhiNode::is_diamond_phi() const {
+  // Check for a 2-path merge
+  Node *region = in(0);
+  if( !region ) return 0;
+  if( region->req() != 3 ) return 0;
+  if(         req() != 3 ) return 0;
+  // Check that both paths come from the same If
+  Node *ifp1 = region->in(1);
+  Node *ifp2 = region->in(2);
+  if( !ifp1 || !ifp2 ) return 0;
+  Node *iff = ifp1->in(0);
+  if( !iff || !iff->is_If() ) return 0;
+  if( iff != ifp2->in(0) ) return 0;
+  // Check for a proper bool/cmp
+  const Node *b = iff->in(1);
+  if( !b->is_Bool() ) return 0;
+  const Node *cmp = b->in(1);
+  if( !cmp->is_Cmp() ) return 0;
+
+  // Check for branching opposite expected
+  if( ifp2->Opcode() == Op_IfTrue ) {
+    assert( ifp1->Opcode() == Op_IfFalse, "" );
+    return 2;
+  } else {
+    assert( ifp1->Opcode() == Op_IfTrue, "" );
+    return 1;
+  }
+}
+
+//----------------------------check_cmove_id-----------------------------------
+// Check for CMove'ing a constant after comparing against the constant.
+// Happens all the time now, since if we compare equality vs a constant in
+// the parser, we "know" the variable is constant on one path and we force
+// it.  Thus code like "if( x==0 ) {/*EMPTY*/}" ends up inserting a
+// conditional move: "x = (x==0)?0:x;".  Yucko.  This fix is slightly more
+// general in that we don't need constants.  Since CMove's are only inserted
+// in very special circumstances, we do it here on generic Phi's.
+Node* PhiNode::is_cmove_id(PhaseTransform* phase, int true_path) {
+  assert(true_path !=0, "only diamond shape graph expected");
+
+  // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+  // phi->region->if_proj->ifnode->bool->cmp
+  Node*     region = in(0);
+  Node*     iff    = region->in(1)->in(0);
+  BoolNode* b      = iff->in(1)->as_Bool();
+  Node*     cmp    = b->in(1);
+  Node*     tval   = in(true_path);
+  Node*     fval   = in(3-true_path);
+  Node*     id     = CMoveNode::is_cmove_id(phase, cmp, tval, fval, b);
+  if (id == NULL)
+    return NULL;
+
+  // Either value might be a cast that depends on a branch of 'iff'.
+  // Since the 'id' value will float free of the diamond, either
+  // decast or return failure.
+  Node* ctl = id->in(0);
+  if (ctl != NULL && ctl->in(0) == iff) {
+    if (id->is_ConstraintCast()) {
+      return id->in(1);
+    } else {
+      // Don't know how to disentangle this value.
+      return NULL;
+    }
+  }
+
+  return id;
+}
+
+//------------------------------Identity---------------------------------------
+// Check for Region being Identity.
+Node *PhiNode::Identity( PhaseTransform *phase ) {
+  // Check for no merging going on
+  // (There used to be special-case code here when this->region->is_Loop.
+  // It would check for a tributary phi on the backedge that the main phi
+  // trivially, perhaps with a single cast.  The unique_input method
+  // does all this and more, by reducing such tributaries to 'this'.)
+  Node* uin = unique_input(phase);
+  if (uin != NULL) {
+    return uin;
+  }
+
+  int true_path = is_diamond_phi();
+  if (true_path != 0) {
+    Node* id = is_cmove_id(phase, true_path);
+    if (id != NULL)  return id;
+  }
+
+  return this;                     // No identity
+}
+
+//-----------------------------unique_input------------------------------------
+// Find the unique value, discounting top, self-loops, and casts.
+// Return top if there are no inputs, and self if there are multiple.
+Node* PhiNode::unique_input(PhaseTransform* phase) {
+  //  1) One unique direct input, or
+  //  2) some of the inputs have an intervening ConstraintCast and
+  //     the type of input is the same or sharper (more specific)
+  //     than the phi's type.
+  //  3) an input is a self loop
+  //
+  //  1) input   or   2) input     or   3) input __
+  //     /   \           /   \               \  /  \
+  //     \   /          |    cast             phi  cast
+  //      phi            \   /               /  \  /
+  //                      phi               /    --
+
+  Node* r = in(0);                      // RegionNode
+  if (r == NULL)  return in(1);         // Already degraded to a Copy
+  Node* uncasted_input = NULL; // The unique uncasted input (ConstraintCasts removed)
+  Node* direct_input   = NULL; // The unique direct input
+
+  for (uint i = 1, cnt = req(); i < cnt; ++i) {
+    Node* rc = r->in(i);
+    if (rc == NULL || phase->type(rc) == Type::TOP)
+      continue;                 // ignore unreachable control path
+    Node* n = in(i);
+    Node* un = n->uncast();
+    if (un == NULL || un == this || phase->type(un) == Type::TOP) {
+      continue; // ignore if top, or in(i) and "this" are in a data cycle
+    }
+    // Check for a unique uncasted input
+    if (uncasted_input == NULL) {
+      uncasted_input = un;
+    } else if (uncasted_input != un) {
+      uncasted_input = NodeSentinel; // no unique uncasted input
+    }
+    // Check for a unique direct input
+    if (direct_input == NULL) {
+      direct_input = n;
+    } else if (direct_input != n) {
+      direct_input = NodeSentinel; // no unique direct input
+    }
+  }
+  if (direct_input == NULL) {
+    return phase->C->top();        // no inputs
+  }
+  assert(uncasted_input != NULL,"");
+
+  if (direct_input != NodeSentinel) {
+    return direct_input;           // one unique direct input
+  }
+  if (uncasted_input != NodeSentinel &&
+      phase->type(uncasted_input)->higher_equal(type())) {
+    return uncasted_input;         // one unique uncasted input
+  }
+
+  // Nothing.
+  return NULL;
+}
+
+//------------------------------is_x2logic-------------------------------------
+// Check for simple convert-to-boolean pattern
+// If:(C Bool) Region:(IfF IfT) Phi:(Region 0 1)
+// Convert Phi to an ConvIB.
+static Node *is_x2logic( PhaseGVN *phase, PhiNode *phi, int true_path ) {
+  assert(true_path !=0, "only diamond shape graph expected");
+  // Convert the true/false index into an expected 0/1 return.
+  // Map 2->0 and 1->1.
+  int flipped = 2-true_path;
+
+  // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+  // phi->region->if_proj->ifnode->bool->cmp
+  Node *region = phi->in(0);
+  Node *iff = region->in(1)->in(0);
+  BoolNode *b = (BoolNode*)iff->in(1);
+  const CmpNode *cmp = (CmpNode*)b->in(1);
+
+  Node *zero = phi->in(1);
+  Node *one  = phi->in(2);
+  const Type *tzero = phase->type( zero );
+  const Type *tone  = phase->type( one  );
+
+  // Check for compare vs 0
+  const Type *tcmp = phase->type(cmp->in(2));
+  if( tcmp != TypeInt::ZERO && tcmp != TypePtr::NULL_PTR ) {
+    // Allow cmp-vs-1 if the other input is bounded by 0-1
+    if( !(tcmp == TypeInt::ONE && phase->type(cmp->in(1)) == TypeInt::BOOL) )
+      return NULL;
+    flipped = 1-flipped;        // Test is vs 1 instead of 0!
+  }
+
+  // Check for setting zero/one opposite expected
+  if( tzero == TypeInt::ZERO ) {
+    if( tone == TypeInt::ONE ) {
+    } else return NULL;
+  } else if( tzero == TypeInt::ONE ) {
+    if( tone == TypeInt::ZERO ) {
+      flipped = 1-flipped;
+    } else return NULL;
+  } else return NULL;
+
+  // Check for boolean test backwards
+  if( b->_test._test == BoolTest::ne ) {
+  } else if( b->_test._test == BoolTest::eq ) {
+    flipped = 1-flipped;
+  } else return NULL;
+
+  // Build int->bool conversion
+  Node *n = new (phase->C, 2) Conv2BNode( cmp->in(1) );
+  if( flipped )
+    n = new (phase->C, 3) XorINode( phase->transform(n), phase->intcon(1) );
+
+  return n;
+}
+
+//------------------------------is_cond_add------------------------------------
+// Check for simple conditional add pattern:  "(P < Q) ? X+Y : X;"
+// To be profitable the control flow has to disappear; there can be no other
+// values merging here.  We replace the test-and-branch with:
+// "(sgn(P-Q))&Y) + X".  Basically, convert "(P < Q)" into 0 or -1 by
+// moving the carry bit from (P-Q) into a register with 'sbb EAX,EAX'.
+// Then convert Y to 0-or-Y and finally add.
+// This is a key transform for SpecJava _201_compress.
+static Node* is_cond_add(PhaseGVN *phase, PhiNode *phi, int true_path) {
+  assert(true_path !=0, "only diamond shape graph expected");
+
+  // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+  // phi->region->if_proj->ifnode->bool->cmp
+  RegionNode *region = (RegionNode*)phi->in(0);
+  Node *iff = region->in(1)->in(0);
+  BoolNode* b = iff->in(1)->as_Bool();
+  const CmpNode *cmp = (CmpNode*)b->in(1);
+
+  // Make sure only merging this one phi here
+  if (region->has_unique_phi() != phi)  return NULL;
+
+  // Make sure each arm of the diamond has exactly one output, which we assume
+  // is the region.  Otherwise, the control flow won't disappear.
+  if (region->in(1)->outcnt() != 1) return NULL;
+  if (region->in(2)->outcnt() != 1) return NULL;
+
+  // Check for "(P < Q)" of type signed int
+  if (b->_test._test != BoolTest::lt)  return NULL;
+  if (cmp->Opcode() != Op_CmpI)        return NULL;
+
+  Node *p = cmp->in(1);
+  Node *q = cmp->in(2);
+  Node *n1 = phi->in(  true_path);
+  Node *n2 = phi->in(3-true_path);
+
+  int op = n1->Opcode();
+  if( op != Op_AddI           // Need zero as additive identity
+      /*&&op != Op_SubI &&
+      op != Op_AddP &&
+      op != Op_XorI &&
+      op != Op_OrI*/ )
+    return NULL;
+
+  Node *x = n2;
+  Node *y = n1->in(1);
+  if( n2 == n1->in(1) ) {
+    y = n1->in(2);
+  } else if( n2 == n1->in(1) ) {
+  } else return NULL;
+
+  // Not so profitable if compare and add are constants
+  if( q->is_Con() && phase->type(q) != TypeInt::ZERO && y->is_Con() )
+    return NULL;
+
+  Node *cmplt = phase->transform( new (phase->C, 3) CmpLTMaskNode(p,q) );
+  Node *j_and   = phase->transform( new (phase->C, 3) AndINode(cmplt,y) );
+  return new (phase->C, 3) AddINode(j_and,x);
+}
+
+//------------------------------is_absolute------------------------------------
+// Check for absolute value.
+static Node* is_absolute( PhaseGVN *phase, PhiNode *phi_root, int true_path) {
+  assert(true_path !=0, "only diamond shape graph expected");
+
+  int  cmp_zero_idx = 0;        // Index of compare input where to look for zero
+  int  phi_x_idx = 0;           // Index of phi input where to find naked x
+
+  // ABS ends with the merge of 2 control flow paths.
+  // Find the false path from the true path. With only 2 inputs, 3 - x works nicely.
+  int false_path = 3 - true_path;
+
+  // is_diamond_phi() has guaranteed the correctness of the nodes sequence:
+  // phi->region->if_proj->ifnode->bool->cmp
+  BoolNode *bol = phi_root->in(0)->in(1)->in(0)->in(1)->as_Bool();
+
+  // Check bool sense
+  switch( bol->_test._test ) {
+  case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = true_path;  break;
+  case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = false_path; break;
+  case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = true_path;  break;
+  case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = false_path; break;
+  default:           return NULL;                              break;
+  }
+
+  // Test is next
+  Node *cmp = bol->in(1);
+  const Type *tzero = NULL;
+  switch( cmp->Opcode() ) {
+  case Op_CmpF:    tzero = TypeF::ZERO; break; // Float ABS
+  case Op_CmpD:    tzero = TypeD::ZERO; break; // Double ABS
+  default: return NULL;
+  }
+
+  // Find zero input of compare; the other input is being abs'd
+  Node *x = NULL;
+  bool flip = false;
+  if( phase->type(cmp->in(cmp_zero_idx)) == tzero ) {
+    x = cmp->in(3 - cmp_zero_idx);
+  } else if( phase->type(cmp->in(3 - cmp_zero_idx)) == tzero ) {
+    // The test is inverted, we should invert the result...
+    x = cmp->in(cmp_zero_idx);
+    flip = true;
+  } else {
+    return NULL;
+  }
+
+  // Next get the 2 pieces being selected, one is the original value
+  // and the other is the negated value.
+  if( phi_root->in(phi_x_idx) != x ) return NULL;
+
+  // Check other phi input for subtract node
+  Node *sub = phi_root->in(3 - phi_x_idx);
+
+  // Allow only Sub(0,X) and fail out for all others; Neg is not OK
+  if( tzero == TypeF::ZERO ) {
+    if( sub->Opcode() != Op_SubF ||
+        sub->in(2) != x ||
+        phase->type(sub->in(1)) != tzero ) return NULL;
+    x = new (phase->C, 2) AbsFNode(x);
+    if (flip) {
+      x = new (phase->C, 3) SubFNode(sub->in(1), phase->transform(x));
+    }
+  } else {
+    if( sub->Opcode() != Op_SubD ||
+        sub->in(2) != x ||
+        phase->type(sub->in(1)) != tzero ) return NULL;
+    x = new (phase->C, 2) AbsDNode(x);
+    if (flip) {
+      x = new (phase->C, 3) SubDNode(sub->in(1), phase->transform(x));
+    }
+  }
+
+  return x;
+}
+
+//------------------------------split_once-------------------------------------
+// Helper for split_flow_path
+static void split_once(PhaseIterGVN *igvn, Node *phi, Node *val, Node *n, Node *newn) {
+  igvn->hash_delete(n);         // Remove from hash before hacking edges
+
+  uint j = 1;
+  for( uint i = phi->req()-1; i > 0; i-- ) {
+    if( phi->in(i) == val ) {   // Found a path with val?
+      // Add to NEW Region/Phi, no DU info
+      newn->set_req( j++, n->in(i) );
+      // Remove from OLD Region/Phi
+      n->del_req(i);
+    }
+  }
+
+  // Register the new node but do not transform it.  Cannot transform until the
+  // entire Region/Phi conglerate has been hacked as a single huge transform.
+  igvn->register_new_node_with_optimizer( newn );
+  // Now I can point to the new node.
+  n->add_req(newn);
+  igvn->_worklist.push(n);
+}
+
+//------------------------------split_flow_path--------------------------------
+// Check for merging identical values and split flow paths
+static Node* split_flow_path(PhaseGVN *phase, PhiNode *phi) {
+  BasicType bt = phi->type()->basic_type();
+  if( bt == T_ILLEGAL || type2size[bt] <= 0 )
+    return NULL;                // Bail out on funny non-value stuff
+  if( phi->req() <= 3 )         // Need at least 2 matched inputs and a
+    return NULL;                // third unequal input to be worth doing
+
+  // Scan for a constant
+  uint i;
+  for( i = 1; i < phi->req()-1; i++ ) {
+    Node *n = phi->in(i);
+    if( !n ) return NULL;
+    if( phase->type(n) == Type::TOP ) return NULL;
+    if( n->Opcode() == Op_ConP )
+      break;
+  }
+  if( i >= phi->req() )         // Only split for constants
+    return NULL;
+
+  Node *val = phi->in(i);       // Constant to split for
+  uint hit = 0;                 // Number of times it occurs
+
+  for( ; i < phi->req(); i++ ){ // Count occurances of constant
+    Node *n = phi->in(i);
+    if( !n ) return NULL;
+    if( phase->type(n) == Type::TOP ) return NULL;
+    if( phi->in(i) == val )
+      hit++;
+  }
+
+  if( hit <= 1 ||               // Make sure we find 2 or more
+      hit == phi->req()-1 )     // and not ALL the same value
+    return NULL;
+
+  // Now start splitting out the flow paths that merge the same value.
+  // Split first the RegionNode.
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  Node *r = phi->region();
+  RegionNode *newr = new (phase->C, hit+1) RegionNode(hit+1);
+  split_once(igvn, phi, val, r, newr);
+
+  // Now split all other Phis than this one
+  for (DUIterator_Fast kmax, k = r->fast_outs(kmax); k < kmax; k++) {
+    Node* phi2 = r->fast_out(k);
+    if( phi2->is_Phi() && phi2->as_Phi() != phi ) {
+      PhiNode *newphi = PhiNode::make_blank(newr, phi2);
+      split_once(igvn, phi, val, phi2, newphi);
+    }
+  }
+
+  // Clean up this guy
+  igvn->hash_delete(phi);
+  for( i = phi->req()-1; i > 0; i-- ) {
+    if( phi->in(i) == val ) {
+      phi->del_req(i);
+    }
+  }
+  phi->add_req(val);
+
+  return phi;
+}
+
+//=============================================================================
+//------------------------------simple_data_loop_check-------------------------
+//  Try to determing if the phi node in a simple safe/unsafe data loop.
+//  Returns:
+// enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
+// Safe       - safe case when the phi and it's inputs reference only safe data
+//              nodes;
+// Unsafe     - the phi and it's inputs reference unsafe data nodes but there
+//              is no reference back to the phi - need a graph walk
+//              to determine if it is in a loop;
+// UnsafeLoop - unsafe case when the phi references itself directly or through
+//              unsafe data node.
+//  Note: a safe data node is a node which could/never reference itself during
+//  GVN transformations. For now it is Con, Proj, Phi, CastPP, CheckCastPP.
+//  I mark Phi nodes as safe node not only because they can reference itself
+//  but also to prevent mistaking the fallthrough case inside an outer loop
+//  as dead loop when the phi references itselfs through an other phi.
+PhiNode::LoopSafety PhiNode::simple_data_loop_check(Node *in) const {
+  // It is unsafe loop if the phi node references itself directly.
+  if (in == (Node*)this)
+    return UnsafeLoop; // Unsafe loop
+  // Unsafe loop if the phi node references itself through an unsafe data node.
+  // Exclude cases with null inputs or data nodes which could reference
+  // itself (safe for dead loops).
+  if (in != NULL && !in->is_dead_loop_safe()) {
+    // Check inputs of phi's inputs also.
+    // It is much less expensive then full graph walk.
+    uint cnt = in->req();
+    for (uint i = 1; i < cnt; ++i) {
+      Node* m = in->in(i);
+      if (m == (Node*)this)
+        return UnsafeLoop; // Unsafe loop
+      if (m != NULL && !m->is_dead_loop_safe()) {
+        // Check the most common case (about 30% of all cases):
+        // phi->Load/Store->AddP->(ConP ConP Con)/(Parm Parm Con).
+        Node *m1 = (m->is_AddP() && m->req() > 3) ? m->in(1) : NULL;
+        if (m1 == (Node*)this)
+          return UnsafeLoop; // Unsafe loop
+        if (m1 != NULL && m1 == m->in(2) &&
+            m1->is_dead_loop_safe() && m->in(3)->is_Con()) {
+          continue; // Safe case
+        }
+        // The phi references an unsafe node - need full analysis.
+        return Unsafe;
+      }
+    }
+  }
+  return Safe; // Safe case - we can optimize the phi node.
+}
+
+//------------------------------is_unsafe_data_reference-----------------------
+// If phi can be reached through the data input - it is data loop.
+bool PhiNode::is_unsafe_data_reference(Node *in) const {
+  assert(req() > 1, "");
+  // First, check simple cases when phi references itself directly or
+  // through an other node.
+  LoopSafety safety = simple_data_loop_check(in);
+  if (safety == UnsafeLoop)
+    return true;  // phi references itself - unsafe loop
+  else if (safety == Safe)
+    return false; // Safe case - phi could be replaced with the unique input.
+
+  // Unsafe case when we should go through data graph to determine
+  // if the phi references itself.
+
+  ResourceMark rm;
+
+  Arena *a = Thread::current()->resource_area();
+  Node_List nstack(a);
+  VectorSet visited(a);
+
+  nstack.push(in); // Start with unique input.
+  visited.set(in->_idx);
+  while (nstack.size() != 0) {
+    Node* n = nstack.pop();
+    uint cnt = n->req();
+    for (uint i = 1; i < cnt; i++) { // Only data paths
+      Node* m = n->in(i);
+      if (m == (Node*)this) {
+        return true;    // Data loop
+      }
+      if (m != NULL && !m->is_dead_loop_safe()) { // Only look for unsafe cases.
+        if (!visited.test_set(m->_idx))
+          nstack.push(m);
+      }
+    }
+  }
+  return false; // The phi is not reachable from its inputs
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Must preserve
+// the CFG, but we can still strip out dead paths.
+Node *PhiNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // The next should never happen after 6297035 fix.
+  if( is_copy() )               // Already degraded to a Copy ?
+    return NULL;                // No change
+
+  Node *r = in(0);              // RegionNode
+  assert(r->in(0) == NULL || !r->in(0)->is_Root(), "not a specially hidden merge");
+
+  // Note: During parsing, phis are often transformed before their regions.
+  // This means we have to use type_or_null to defend against untyped regions.
+  if( phase->type_or_null(r) == Type::TOP ) // Dead code?
+    return NULL;                // No change
+
+  Node *top = phase->C->top();
+
+  // The are 2 situations when only one valid phi's input is left
+  // (in addition to Region input).
+  // One: region is not loop - replace phi with this input.
+  // Two: region is loop - replace phi with top since this data path is dead
+  //                       and we need to break the dead data loop.
+  Node* progress = NULL;        // Record if any progress made
+  for( uint j = 1; j < req(); ++j ){ // For all paths in
+    // Check unreachable control paths
+    Node* rc = r->in(j);
+    Node* n = in(j);            // Get the input
+    if (rc == NULL || phase->type(rc) == Type::TOP) {
+      if (n != top) {           // Not already top?
+        set_req(j, top);        // Nuke it down
+        progress = this;        // Record progress
+      }
+    }
+  }
+
+  Node* uin = unique_input(phase);
+  if (uin == top) {             // Simplest case: no alive inputs.
+    if (can_reshape)            // IGVN transformation
+      return top;
+    else
+      return NULL;              // Identity will return TOP
+  } else if (uin != NULL) {
+    // Only one not-NULL unique input path is left.
+    // Determine if this input is backedge of a loop.
+    // (Skip new phis which have no uses and dead regions).
+    if( outcnt() > 0 && r->in(0) != NULL ) {
+      // First, take the short cut when we know it is a loop and
+      // the EntryControl data path is dead.
+      assert(!r->is_Loop() || r->req() == 3, "Loop node should have 3 inputs");
+      // Then, check if there is a data loop when phi references itself directly
+      // or through other data nodes.
+      if( r->is_Loop() && !phase->eqv_uncast(uin, in(LoopNode::EntryControl)) ||
+         !r->is_Loop() && is_unsafe_data_reference(uin) ) {
+        // Break this data loop to avoid creation of a dead loop.
+        if (can_reshape) {
+          return top;
+        } else {
+          // We can't return top if we are in Parse phase - cut inputs only
+          // let Identity to handle the case.
+          replace_edge(uin, top);
+          return NULL;
+        }
+      }
+    }
+
+    // One unique input.
+    debug_only(Node* ident = Identity(phase));
+    // The unique input must eventually be detected by the Identity call.
+#ifdef ASSERT
+    if (ident != uin && !ident->is_top()) {
+      // print this output before failing assert
+      r->dump(3);
+      this->dump(3);
+      ident->dump();
+      uin->dump();
+    }
+#endif
+    assert(ident == uin || ident->is_top(), "Identity must clean this up");
+    return NULL;
+  }
+
+
+  Node* opt = NULL;
+  int true_path = is_diamond_phi();
+  if( true_path != 0 ) {
+    // Check for CMove'ing identity. If it would be unsafe,
+    // handle it here. In the safe case, let Identity handle it.
+    Node* unsafe_id = is_cmove_id(phase, true_path);
+    if( unsafe_id != NULL && is_unsafe_data_reference(unsafe_id) )
+      opt = unsafe_id;
+
+    // Check for simple convert-to-boolean pattern
+    if( opt == NULL )
+      opt = is_x2logic(phase, this, true_path);
+
+    // Check for absolute value
+    if( opt == NULL )
+      opt = is_absolute(phase, this, true_path);
+
+    // Check for conditional add
+    if( opt == NULL && can_reshape )
+      opt = is_cond_add(phase, this, true_path);
+
+    // These 4 optimizations could subsume the phi:
+    // have to check for a dead data loop creation.
+    if( opt != NULL ) {
+      if( opt == unsafe_id || is_unsafe_data_reference(opt) ) {
+        // Found dead loop.
+        if( can_reshape )
+          return top;
+        // We can't return top if we are in Parse phase - cut inputs only
+        // to stop further optimizations for this phi. Identity will return TOP.
+        assert(req() == 3, "only diamond merge phi here");
+        set_req(1, top);
+        set_req(2, top);
+        return NULL;
+      } else {
+        return opt;
+      }
+    }
+  }
+
+  // Check for merging identical values and split flow paths
+  if (can_reshape) {
+    opt = split_flow_path(phase, this);
+    // This optimization only modifies phi - don't need to check for dead loop.
+    assert(opt == NULL || phase->eqv(opt, this), "do not elide phi");
+    if (opt != NULL)  return opt;
+  }
+
+  if (in(1) != NULL && in(1)->Opcode() == Op_AddP && can_reshape) {
+    // Try to undo Phi of AddP:
+    //   (Phi (AddP base base y) (AddP base2 base2 y))
+    // becomes:
+    //   newbase := (Phi base base2)
+    //   (AddP newbase newbase y)
+    //
+    // This occurs as a result of unsuccessful split_thru_phi and
+    // interferes with taking advantage of addressing modes.  See the
+    // clone_shift_expressions code in matcher.cpp
+    Node* addp = in(1);
+    const Type* type = addp->in(AddPNode::Base)->bottom_type();
+    Node* y = addp->in(AddPNode::Offset);
+    if (y != NULL && addp->in(AddPNode::Base) == addp->in(AddPNode::Address)) {
+      // make sure that all the inputs are similar to the first one,
+      // i.e. AddP with base == address and same offset as first AddP
+      bool doit = true;
+      for (uint i = 2; i < req(); i++) {
+        if (in(i) == NULL ||
+            in(i)->Opcode() != Op_AddP ||
+            in(i)->in(AddPNode::Base) != in(i)->in(AddPNode::Address) ||
+            in(i)->in(AddPNode::Offset) != y) {
+          doit = false;
+          break;
+        }
+        // Accumulate type for resulting Phi
+        type = type->meet(in(i)->in(AddPNode::Base)->bottom_type());
+      }
+      Node* base = NULL;
+      if (doit) {
+        // Check for neighboring AddP nodes in a tree.
+        // If they have a base, use that it.
+        for (DUIterator_Fast kmax, k = this->fast_outs(kmax); k < kmax; k++) {
+          Node* u = this->fast_out(k);
+          if (u->is_AddP()) {
+            Node* base2 = u->in(AddPNode::Base);
+            if (base2 != NULL && !base2->is_top()) {
+              if (base == NULL)
+                base = base2;
+              else if (base != base2)
+                { doit = false; break; }
+            }
+          }
+        }
+      }
+      if (doit) {
+        if (base == NULL) {
+          base = new (phase->C, in(0)->req()) PhiNode(in(0), type, NULL);
+          for (uint i = 1; i < req(); i++) {
+            base->init_req(i, in(i)->in(AddPNode::Base));
+          }
+          phase->is_IterGVN()->register_new_node_with_optimizer(base);
+        }
+        return new (phase->C, 4) AddPNode(base, base, y);
+      }
+    }
+  }
+
+  // Split phis through memory merges, so that the memory merges will go away.
+  // Piggy-back this transformation on the search for a unique input....
+  // It will be as if the merged memory is the unique value of the phi.
+  // (Do not attempt this optimization unless parsing is complete.
+  // It would make the parser's memory-merge logic sick.)
+  // (MergeMemNode is not dead_loop_safe - need to check for dead loop.)
+  if (progress == NULL && can_reshape && type() == Type::MEMORY) {
+    // see if this phi should be sliced
+    uint merge_width = 0;
+    bool saw_self = false;
+    for( uint i=1; i<req(); ++i ) {// For all paths in
+      Node *ii = in(i);
+      if (ii->is_MergeMem()) {
+        MergeMemNode* n = ii->as_MergeMem();
+        merge_width = MAX2(merge_width, n->req());
+        saw_self = saw_self || phase->eqv(n->base_memory(), this);
+      }
+    }
+
+    // This restriction is temporarily necessary to ensure termination:
+    if (!saw_self && adr_type() == TypePtr::BOTTOM)  merge_width = 0;
+
+    if (merge_width > Compile::AliasIdxRaw) {
+      // found at least one non-empty MergeMem
+      const TypePtr* at = adr_type();
+      if (at != TypePtr::BOTTOM) {
+        // Patch the existing phi to select an input from the merge:
+        // Phi:AT1(...MergeMem(m0, m1, m2)...) into
+        //     Phi:AT1(...m1...)
+        int alias_idx = phase->C->get_alias_index(at);
+        for (uint i=1; i<req(); ++i) {
+          Node *ii = in(i);
+          if (ii->is_MergeMem()) {
+            MergeMemNode* n = ii->as_MergeMem();
+            // compress paths and change unreachable cycles to TOP
+            // If not, we can update the input infinitely along a MergeMem cycle
+            // Equivalent code is in MemNode::Ideal_common
+            Node         *m  = phase->transform(n);
+            // If tranformed to a MergeMem, get the desired slice
+            // Otherwise the returned node represents memory for every slice
+            Node *new_mem = (m->is_MergeMem()) ?
+                             m->as_MergeMem()->memory_at(alias_idx) : m;
+            // Update input if it is progress over what we have now
+            if (new_mem != ii) {
+              set_req(i, new_mem);
+              progress = this;
+            }
+          }
+        }
+      } else {
+        // We know that at least one MergeMem->base_memory() == this
+        // (saw_self == true). If all other inputs also references this phi
+        // (directly or through data nodes) - it is dead loop.
+        bool saw_safe_input = false;
+        for (uint j = 1; j < req(); ++j) {
+          Node *n = in(j);
+          if (n->is_MergeMem() && n->as_MergeMem()->base_memory() == this)
+            continue;              // skip known cases
+          if (!is_unsafe_data_reference(n)) {
+            saw_safe_input = true; // found safe input
+            break;
+          }
+        }
+        if (!saw_safe_input)
+          return top; // all inputs reference back to this phi - dead loop
+
+        // Phi(...MergeMem(m0, m1:AT1, m2:AT2)...) into
+        //     MergeMem(Phi(...m0...), Phi:AT1(...m1...), Phi:AT2(...m2...))
+        PhaseIterGVN *igvn = phase->is_IterGVN();
+        Node* hook = new (phase->C, 1) Node(1);
+        PhiNode* new_base = (PhiNode*) clone();
+        // Must eagerly register phis, since they participate in loops.
+        if (igvn) {
+          igvn->register_new_node_with_optimizer(new_base);
+          hook->add_req(new_base);
+        }
+        MergeMemNode* result = MergeMemNode::make(phase->C, new_base);
+        for (uint i = 1; i < req(); ++i) {
+          Node *ii = in(i);
+          if (ii->is_MergeMem()) {
+            MergeMemNode* n = ii->as_MergeMem();
+            for (MergeMemStream mms(result, n); mms.next_non_empty2(); ) {
+              // If we have not seen this slice yet, make a phi for it.
+              bool made_new_phi = false;
+              if (mms.is_empty()) {
+                Node* new_phi = new_base->slice_memory(mms.adr_type(phase->C));
+                made_new_phi = true;
+                if (igvn) {
+                  igvn->register_new_node_with_optimizer(new_phi);
+                  hook->add_req(new_phi);
+                }
+                mms.set_memory(new_phi);
+              }
+              Node* phi = mms.memory();
+              assert(made_new_phi || phi->in(i) == n, "replace the i-th merge by a slice");
+              phi->set_req(i, mms.memory2());
+            }
+          }
+        }
+        // Distribute all self-loops.
+        { // (Extra braces to hide mms.)
+          for (MergeMemStream mms(result); mms.next_non_empty(); ) {
+            Node* phi = mms.memory();
+            for (uint i = 1; i < req(); ++i) {
+              if (phi->in(i) == this)  phi->set_req(i, phi);
+            }
+          }
+        }
+        // now transform the new nodes, and return the mergemem
+        for (MergeMemStream mms(result); mms.next_non_empty(); ) {
+          Node* phi = mms.memory();
+          mms.set_memory(phase->transform(phi));
+        }
+        if (igvn) { // Unhook.
+          igvn->hash_delete(hook);
+          for (uint i = 1; i < hook->req(); i++) {
+            hook->set_req(i, NULL);
+          }
+        }
+        // Replace self with the result.
+        return result;
+      }
+    }
+  }
+
+  return progress;              // Return any progress
+}
+
+//------------------------------out_RegMask------------------------------------
+const RegMask &PhiNode::in_RegMask(uint i) const {
+  return i ? out_RegMask() : RegMask::Empty;
+}
+
+const RegMask &PhiNode::out_RegMask() const {
+  uint ideal_reg = Matcher::base2reg[_type->base()];
+  assert( ideal_reg != Node::NotAMachineReg, "invalid type at Phi" );
+  if( ideal_reg == 0 ) return RegMask::Empty;
+  return *(Compile::current()->matcher()->idealreg2spillmask[ideal_reg]);
+}
+
+#ifndef PRODUCT
+void PhiNode::dump_spec(outputStream *st) const {
+  TypeNode::dump_spec(st);
+  if (in(0) != NULL &&
+      in(0)->is_CountedLoop() &&
+      in(0)->as_CountedLoop()->phi() == this) {
+    st->print(" #tripcount");
+  }
+}
+#endif
+
+
+//=============================================================================
+const Type *GotoNode::Value( PhaseTransform *phase ) const {
+  // If the input is reachable, then we are executed.
+  // If the input is not reachable, then we are not executed.
+  return phase->type(in(0));
+}
+
+Node *GotoNode::Identity( PhaseTransform *phase ) {
+  return in(0);                // Simple copy of incoming control
+}
+
+const RegMask &GotoNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &JumpNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &JProjNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//=============================================================================
+const RegMask &CProjNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+
+
+//=============================================================================
+
+uint PCTableNode::hash() const { return Node::hash() + _size; }
+uint PCTableNode::cmp( const Node &n ) const
+{ return _size == ((PCTableNode&)n)._size; }
+
+const Type *PCTableNode::bottom_type() const {
+  const Type** f = TypeTuple::fields(_size);
+  for( uint i = 0; i < _size; i++ ) f[i] = Type::CONTROL;
+  return TypeTuple::make(_size, f);
+}
+
+//------------------------------Value------------------------------------------
+// Compute the type of the PCTableNode.  If reachable it is a tuple of
+// Control, otherwise the table targets are not reachable
+const Type *PCTableNode::Value( PhaseTransform *phase ) const {
+  if( phase->type(in(0)) == Type::CONTROL )
+    return bottom_type();
+  return Type::TOP;             // All paths dead?  Then so are we
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *PCTableNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//=============================================================================
+uint JumpProjNode::hash() const {
+  return Node::hash() + _dest_bci;
+}
+
+uint JumpProjNode::cmp( const Node &n ) const {
+  return ProjNode::cmp(n) &&
+    _dest_bci == ((JumpProjNode&)n)._dest_bci;
+}
+
+#ifndef PRODUCT
+void JumpProjNode::dump_spec(outputStream *st) const {
+  ProjNode::dump_spec(st);
+   st->print("@bci %d ",_dest_bci);
+}
+#endif
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Check for being unreachable, or for coming from a Rethrow.  Rethrow's cannot
+// have the default "fall_through_index" path.
+const Type *CatchNode::Value( PhaseTransform *phase ) const {
+  // Unreachable?  Then so are all paths from here.
+  if( phase->type(in(0)) == Type::TOP ) return Type::TOP;
+  // First assume all paths are reachable
+  const Type** f = TypeTuple::fields(_size);
+  for( uint i = 0; i < _size; i++ ) f[i] = Type::CONTROL;
+  // Identify cases that will always throw an exception
+  // () rethrow call
+  // () virtual or interface call with NULL receiver
+  // () call is a check cast with incompatible arguments
+  if( in(1)->is_Proj() ) {
+    Node *i10 = in(1)->in(0);
+    if( i10->is_Call() ) {
+      CallNode *call = i10->as_Call();
+      // Rethrows always throw exceptions, never return
+      if (call->entry_point() == OptoRuntime::rethrow_stub()) {
+        f[CatchProjNode::fall_through_index] = Type::TOP;
+      } else if( call->req() > TypeFunc::Parms ) {
+        const Type *arg0 = phase->type( call->in(TypeFunc::Parms) );
+        // Check for null reciever to virtual or interface calls
+        if( call->is_CallDynamicJava() &&
+            arg0->higher_equal(TypePtr::NULL_PTR) ) {
+          f[CatchProjNode::fall_through_index] = Type::TOP;
+        }
+      } // End of if not a runtime stub
+    } // End of if have call above me
+  } // End of slot 1 is not a projection
+  return TypeTuple::make(_size, f);
+}
+
+//=============================================================================
+uint CatchProjNode::hash() const {
+  return Node::hash() + _handler_bci;
+}
+
+
+uint CatchProjNode::cmp( const Node &n ) const {
+  return ProjNode::cmp(n) &&
+    _handler_bci == ((CatchProjNode&)n)._handler_bci;
+}
+
+
+//------------------------------Identity---------------------------------------
+// If only 1 target is possible, choose it if it is the main control
+Node *CatchProjNode::Identity( PhaseTransform *phase ) {
+  // If my value is control and no other value is, then treat as ID
+  const TypeTuple *t = phase->type(in(0))->is_tuple();
+  if (t->field_at(_con) != Type::CONTROL)  return this;
+  // If we remove the last CatchProj and elide the Catch/CatchProj, then we
+  // also remove any exception table entry.  Thus we must know the call
+  // feeding the Catch will not really throw an exception.  This is ok for
+  // the main fall-thru control (happens when we know a call can never throw
+  // an exception) or for "rethrow", because a further optimnization will
+  // yank the rethrow (happens when we inline a function that can throw an
+  // exception and the caller has no handler).  Not legal, e.g., for passing
+  // a NULL receiver to a v-call, or passing bad types to a slow-check-cast.
+  // These cases MUST throw an exception via the runtime system, so the VM
+  // will be looking for a table entry.
+  Node *proj = in(0)->in(1);    // Expect a proj feeding CatchNode
+  CallNode *call;
+  if (_con != TypeFunc::Control && // Bail out if not the main control.
+      !(proj->is_Proj() &&      // AND NOT a rethrow
+        proj->in(0)->is_Call() &&
+        (call = proj->in(0)->as_Call()) &&
+        call->entry_point() == OptoRuntime::rethrow_stub()))
+    return this;
+
+  // Search for any other path being control
+  for (uint i = 0; i < t->cnt(); i++) {
+    if (i != _con && t->field_at(i) == Type::CONTROL)
+      return this;
+  }
+  // Only my path is possible; I am identity on control to the jump
+  return in(0)->in(0);
+}
+
+
+#ifndef PRODUCT
+void CatchProjNode::dump_spec(outputStream *st) const {
+  ProjNode::dump_spec(st);
+  st->print("@bci %d ",_handler_bci);
+}
+#endif
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Check for CreateEx being Identity.
+Node *CreateExNode::Identity( PhaseTransform *phase ) {
+  if( phase->type(in(1)) == Type::TOP ) return in(1);
+  if( phase->type(in(0)) == Type::TOP ) return in(0);
+  // We only come from CatchProj, unless the CatchProj goes away.
+  // If the CatchProj is optimized away, then we just carry the
+  // exception oop through.
+  CallNode *call = in(1)->in(0)->as_Call();
+
+  return ( in(0)->is_CatchProj() && in(0)->in(0)->in(1) == in(1) )
+    ? this
+    : call->in(TypeFunc::Parms);
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void NeverBranchNode::format( PhaseRegAlloc *ra_, outputStream *st) const {
+  st->print("%s", Name());
+}
+#endif
diff --git a/src/share/vm/opto/cfgnode.hpp b/src/share/vm/opto/cfgnode.hpp
new file mode 100644
index 000000000..e01e25258
--- /dev/null
+++ b/src/share/vm/opto/cfgnode.hpp
@@ -0,0 +1,481 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+class Matcher;
+class Node;
+class   RegionNode;
+class   TypeNode;
+class     PhiNode;
+class   GotoNode;
+class   MultiNode;
+class     MultiBranchNode;
+class       IfNode;
+class       PCTableNode;
+class         JumpNode;
+class         CatchNode;
+class       NeverBranchNode;
+class   ProjNode;
+class     CProjNode;
+class       IfTrueNode;
+class       IfFalseNode;
+class       CatchProjNode;
+class     JProjNode;
+class       JumpProjNode;
+class     SCMemProjNode;
+class PhaseIdealLoop;
+
+//------------------------------RegionNode-------------------------------------
+// The class of RegionNodes, which can be mapped to basic blocks in the
+// program.  Their inputs point to Control sources.  PhiNodes (described
+// below) have an input point to a RegionNode.  Merged data inputs to PhiNodes
+// correspond 1-to-1 with RegionNode inputs.  The zero input of a PhiNode is
+// the RegionNode, and the zero input of the RegionNode is itself.
+class RegionNode : public Node {
+public:
+  // Node layout (parallels PhiNode):
+  enum { Region,                // Generally points to self.
+         Control                // Control arcs are [1..len)
+  };
+
+  RegionNode( uint required ) : Node(required) {
+    init_class_id(Class_Region);
+    init_req(0,this);
+  }
+
+  Node* is_copy() const {
+    const Node* r = _in[Region];
+    if (r == NULL)
+      return nonnull_req();
+    return NULL;  // not a copy!
+  }
+  PhiNode* has_phi() const;        // returns an arbitrary phi user, or NULL
+  PhiNode* has_unique_phi() const; // returns the unique phi user, or NULL
+  // Is this region node unreachable from root?
+  bool is_unreachable_region(PhaseGVN *phase) const;
+  virtual int Opcode() const;
+  virtual bool pinned() const { return (const Node *)in(0) == this; }
+  virtual bool  is_CFG   () const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type *bottom_type() const { return Type::CONTROL; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const RegMask &out_RegMask() const;
+};
+
+//------------------------------JProjNode--------------------------------------
+// jump projection for node that produces multiple control-flow paths
+class JProjNode : public ProjNode {
+ public:
+  JProjNode( Node* ctrl, uint idx ) : ProjNode(ctrl,idx) {}
+  virtual int Opcode() const;
+  virtual bool  is_CFG() const { return true; }
+  virtual uint  hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual const Node* is_block_proj() const { return in(0); }
+  virtual const RegMask& out_RegMask() const;
+  virtual uint  ideal_reg() const { return 0; }
+};
+
+//------------------------------PhiNode----------------------------------------
+// PhiNodes merge values from different Control paths.  Slot 0 points to the
+// controlling RegionNode.  Other slots map 1-for-1 with incoming control flow
+// paths to the RegionNode.  For speed reasons (to avoid another pass) we
+// can turn PhiNodes into copys in-place by NULL'ing out their RegionNode
+// input in slot 0.
+class PhiNode : public TypeNode {
+  const TypePtr* const _adr_type; // non-null only for Type::MEMORY nodes.
+  // Size is bigger to hold the _adr_type field.
+  virtual uint hash() const;    // Check the type
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const { return sizeof(*this); }
+
+  // Determine a unique non-trivial input, if any.
+  // Ignore casts if it helps.  Return NULL on failure.
+  Node* unique_input(PhaseTransform *phase);
+  // Determine if CMoveNode::is_cmove_id can be used at this join point.
+  Node* is_cmove_id(PhaseTransform* phase, int true_path);
+
+public:
+  // Node layout (parallels RegionNode):
+  enum { Region,                // Control input is the Phi's region.
+         Input                  // Input values are [1..len)
+  };
+
+  PhiNode( Node *r, const Type *t, const TypePtr* at = NULL )
+    : TypeNode(t,r->req()), _adr_type(at) {
+    init_class_id(Class_Phi);
+    init_req(0, r);
+    verify_adr_type();
+  }
+  // create a new phi with in edges matching r and set (initially) to x
+  static PhiNode* make( Node* r, Node* x );
+  // extra type arguments override the new phi's bottom_type and adr_type
+  static PhiNode* make( Node* r, Node* x, const Type *t, const TypePtr* at = NULL );
+  // create a new phi with narrowed memory type
+  PhiNode* slice_memory(const TypePtr* adr_type) const;
+  // like make(r, x), but does not initialize the in edges to x
+  static PhiNode* make_blank( Node* r, Node* x );
+
+  // Accessors
+  RegionNode* region() const { Node* r = in(Region); assert(!r || r->is_Region(), ""); return (RegionNode*)r; }
+
+  Node* is_copy() const {
+    // The node is a real phi if _in[0] is a Region node.
+    DEBUG_ONLY(const Node* r = _in[Region];)
+    assert(r != NULL && r->is_Region(), "Not valid control");
+    return NULL;  // not a copy!
+  }
+
+  // Check for a simple dead loop.
+  enum LoopSafety { Safe = 0, Unsafe, UnsafeLoop };
+  LoopSafety simple_data_loop_check(Node *in) const;
+  // Is it unsafe data loop? It becomes a dead loop if this phi node removed.
+  bool is_unsafe_data_reference(Node *in) const;
+  int  is_diamond_phi() const;
+  virtual int Opcode() const;
+  virtual bool pinned() const { return in(0) != 0; }
+  virtual const TypePtr *adr_type() const { verify_adr_type(true); return _adr_type; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const RegMask &out_RegMask() const;
+  virtual const RegMask &in_RegMask(uint) const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+#ifdef ASSERT
+  void verify_adr_type(VectorSet& visited, const TypePtr* at) const;
+  void verify_adr_type(bool recursive = false) const;
+#else //ASSERT
+  void verify_adr_type(bool recursive = false) const {}
+#endif //ASSERT
+};
+
+//------------------------------GotoNode---------------------------------------
+// GotoNodes perform direct branches.
+class GotoNode : public Node {
+public:
+  GotoNode( Node *control ) : Node(control) {
+    init_flags(Flag_is_Goto);
+  }
+  virtual int Opcode() const;
+  virtual bool pinned() const { return true; }
+  virtual bool  is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual const Node *is_block_proj() const { return this; }
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type *bottom_type() const { return Type::CONTROL; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const RegMask &out_RegMask() const;
+};
+
+//------------------------------CProjNode--------------------------------------
+// control projection for node that produces multiple control-flow paths
+class CProjNode : public ProjNode {
+public:
+  CProjNode( Node *ctrl, uint idx ) : ProjNode(ctrl,idx) {}
+  virtual int Opcode() const;
+  virtual bool  is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual const Node *is_block_proj() const { return in(0); }
+  virtual const RegMask &out_RegMask() const;
+  virtual uint ideal_reg() const { return 0; }
+};
+
+//---------------------------MultiBranchNode-----------------------------------
+// This class defines a MultiBranchNode, a MultiNode which yields multiple
+// control values. These are distinguished from other types of MultiNodes
+// which yield multiple values, but control is always and only projection #0.
+class MultiBranchNode : public MultiNode {
+public:
+  MultiBranchNode( uint required ) : MultiNode(required) {
+    init_class_id(Class_MultiBranch);
+  }
+};
+
+//------------------------------IfNode-----------------------------------------
+// Output selected Control, based on a boolean test
+class IfNode : public MultiBranchNode {
+  // Size is bigger to hold the probability field.  However, _prob does not
+  // change the semantics so it does not appear in the hash & cmp functions.
+  virtual uint size_of() const { return sizeof(*this); }
+public:
+
+  // Degrees of branch prediction probability by order of magnitude:
+  // PROB_UNLIKELY_1e(N) is a 1 in 1eN chance.
+  // PROB_LIKELY_1e(N) is a 1 - PROB_UNLIKELY_1e(N)
+#define PROB_UNLIKELY_MAG(N)    (1e- ## N ## f)
+#define PROB_LIKELY_MAG(N)      (1.0f-PROB_UNLIKELY_MAG(N))
+
+  // Maximum and minimum branch prediction probabilties
+  // 1 in 1,000,000 (magnitude 6)
+  //
+  // Although PROB_NEVER == PROB_MIN and PROB_ALWAYS == PROB_MAX
+  // they are used to distinguish different situations:
+  //
+  // The name PROB_MAX (PROB_MIN) is for probabilities which correspond to
+  // very likely (unlikely) but with a concrete possibility of a rare
+  // contrary case.  These constants would be used for pinning
+  // measurements, and as measures for assertions that have high
+  // confidence, but some evidence of occasional failure.
+  //
+  // The name PROB_ALWAYS (PROB_NEVER) is to stand for situations for which
+  // there is no evidence at all that the contrary case has ever occurred.
+
+#define PROB_NEVER              PROB_UNLIKELY_MAG(6)
+#define PROB_ALWAYS             PROB_LIKELY_MAG(6)
+
+#define PROB_MIN                PROB_UNLIKELY_MAG(6)
+#define PROB_MAX                PROB_LIKELY_MAG(6)
+
+  // Static branch prediction probabilities
+  // 1 in 10 (magnitude 1)
+#define PROB_STATIC_INFREQUENT  PROB_UNLIKELY_MAG(1)
+#define PROB_STATIC_FREQUENT    PROB_LIKELY_MAG(1)
+
+  // Fair probability 50/50
+#define PROB_FAIR               (0.5f)
+
+  // Unknown probability sentinel
+#define PROB_UNKNOWN            (-1.0f)
+
+  // Probability "constructors", to distinguish as a probability any manifest
+  // constant without a names
+#define PROB_LIKELY(x)          ((float) (x))
+#define PROB_UNLIKELY(x)        (1.0f - (float)(x))
+
+  // Other probabilities in use, but without a unique name, are documented
+  // here for lack of a better place:
+  //
+  // 1 in 1000 probabilities (magnitude 3):
+  //     threshold for converting to conditional move
+  //     likelihood of null check failure if a null HAS been seen before
+  //     likelihood of slow path taken in library calls
+  //
+  // 1 in 10,000 probabilities (magnitude 4):
+  //     threshold for making an uncommon trap probability more extreme
+  //     threshold for for making a null check implicit
+  //     likelihood of needing a gc if eden top moves during an allocation
+  //     likelihood of a predicted call failure
+  //
+  // 1 in 100,000 probabilities (magnitude 5):
+  //     threshold for ignoring counts when estimating path frequency
+  //     likelihood of FP clipping failure
+  //     likelihood of catching an exception from a try block
+  //     likelihood of null check failure if a null has NOT been seen before
+  //
+  // Magic manifest probabilities such as 0.83, 0.7, ... can be found in
+  // gen_subtype_check() and catch_inline_exceptions().
+
+  float _prob;                  // Probability of true path being taken.
+  float _fcnt;                  // Frequency counter
+  IfNode( Node *control, Node *b, float p, float fcnt )
+    : MultiBranchNode(2), _prob(p), _fcnt(fcnt) {
+    init_class_id(Class_If);
+    init_req(0,control);
+    init_req(1,b);
+  }
+  virtual int Opcode() const;
+  virtual bool pinned() const { return true; }
+  virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const RegMask &out_RegMask() const;
+  void dominated_by(Node* prev_dom, PhaseIterGVN* igvn);
+  int is_range_check(Node* &range, Node* &index, jint &offset);
+  static Node* up_one_dom(Node* curr, bool linear_only = false);
+
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class IfTrueNode : public CProjNode {
+public:
+  IfTrueNode( IfNode *ifnode ) : CProjNode(ifnode,1) {
+    init_class_id(Class_IfTrue);
+  }
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+};
+
+class IfFalseNode : public CProjNode {
+public:
+  IfFalseNode( IfNode *ifnode ) : CProjNode(ifnode,0) {
+    init_class_id(Class_IfFalse);
+  }
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+};
+
+
+//------------------------------PCTableNode------------------------------------
+// Build an indirect branch table.  Given a control and a table index,
+// control is passed to the Projection matching the table index.  Used to
+// implement switch statements and exception-handling capabilities.
+// Undefined behavior if passed-in index is not inside the table.
+class PCTableNode : public MultiBranchNode {
+  virtual uint hash() const;    // Target count; table size
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const { return sizeof(*this); }
+
+public:
+  const uint _size;             // Number of targets
+
+  PCTableNode( Node *ctrl, Node *idx, uint size ) : MultiBranchNode(2), _size(size) {
+    init_class_id(Class_PCTable);
+    init_req(0, ctrl);
+    init_req(1, idx);
+  }
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *bottom_type() const;
+  virtual bool pinned() const { return true; }
+};
+
+//------------------------------JumpNode---------------------------------------
+// Indirect branch.  Uses PCTable above to implement a switch statement.
+// It emits as a table load and local branch.
+class JumpNode : public PCTableNode {
+public:
+  JumpNode( Node* control, Node* switch_val, uint size) : PCTableNode(control, switch_val, size) {
+    init_class_id(Class_Jump);
+  }
+  virtual int   Opcode() const;
+  virtual const RegMask& out_RegMask() const;
+  virtual const Node* is_block_proj() const { return this; }
+};
+
+class JumpProjNode : public JProjNode {
+  virtual uint hash() const;
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const { return sizeof(*this); }
+
+ private:
+  const int  _dest_bci;
+  const uint _proj_no;
+  const int  _switch_val;
+ public:
+  JumpProjNode(Node* jumpnode, uint proj_no, int dest_bci, int switch_val)
+    : JProjNode(jumpnode, proj_no), _dest_bci(dest_bci), _proj_no(proj_no), _switch_val(switch_val) {
+    init_class_id(Class_JumpProj);
+  }
+
+  virtual int Opcode() const;
+  virtual const Type* bottom_type() const { return Type::CONTROL; }
+  int  dest_bci()    const { return _dest_bci; }
+  int  switch_val()  const { return _switch_val; }
+  uint proj_no()     const { return _proj_no; }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CatchNode--------------------------------------
+// Helper node to fork exceptions.  "Catch" catches any exceptions thrown by
+// a just-prior call.  Looks like a PCTableNode but emits no code - just the
+// table.  The table lookup and branch is implemented by RethrowNode.
+class CatchNode : public PCTableNode {
+public:
+  CatchNode( Node *ctrl, Node *idx, uint size ) : PCTableNode(ctrl,idx,size){
+    init_class_id(Class_Catch);
+  }
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+// CatchProjNode controls which exception handler is targetted after a call.
+// It is passed in the bci of the target handler, or no_handler_bci in case
+// the projection doesn't lead to an exception handler.
+class CatchProjNode : public CProjNode {
+  virtual uint hash() const;
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const { return sizeof(*this); }
+
+private:
+  const int _handler_bci;
+
+public:
+  enum {
+    fall_through_index =  0,      // the fall through projection index
+    catch_all_index    =  1,      // the projection index for catch-alls
+    no_handler_bci     = -1       // the bci for fall through or catch-all projs
+  };
+
+  CatchProjNode(Node* catchnode, uint proj_no, int handler_bci)
+    : CProjNode(catchnode, proj_no), _handler_bci(handler_bci) {
+    init_class_id(Class_CatchProj);
+    assert(proj_no != fall_through_index || handler_bci < 0, "fall through case must have bci < 0");
+  }
+
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *bottom_type() const { return Type::CONTROL; }
+  int  handler_bci() const        { return _handler_bci; }
+  bool is_handler_proj() const    { return _handler_bci >= 0; }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+//---------------------------------CreateExNode--------------------------------
+// Helper node to create the exception coming back from a call
+class CreateExNode : public TypeNode {
+public:
+  CreateExNode(const Type* t, Node* control, Node* i_o) : TypeNode(t, 2) {
+    init_req(0, control);
+    init_req(1, i_o);
+  }
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual bool pinned() const { return true; }
+  uint match_edge(uint idx) const { return 0; }
+  virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//------------------------------NeverBranchNode-------------------------------
+// The never-taken branch.  Used to give the appearance of exiting infinite
+// loops to those algorithms that like all paths to be reachable.  Encodes
+// empty.
+class NeverBranchNode : public MultiBranchNode {
+public:
+  NeverBranchNode( Node *ctrl ) : MultiBranchNode(1) { init_req(0,ctrl); }
+  virtual int Opcode() const;
+  virtual bool pinned() const { return true; };
+  virtual const Type *bottom_type() const { return TypeTuple::IFBOTH; }
+
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const { }
+  virtual uint size(PhaseRegAlloc *ra_) const { return 0; }
+#ifndef PRODUCT
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
diff --git a/src/share/vm/opto/chaitin.cpp b/src/share/vm/opto/chaitin.cpp
new file mode 100644
index 000000000..33ca24ba5
--- /dev/null
+++ b/src/share/vm/opto/chaitin.cpp
@@ -0,0 +1,2042 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_chaitin.cpp.incl"
+
+//=============================================================================
+
+#ifndef PRODUCT
+void LRG::dump( ) const {
+  ttyLocker ttyl;
+  tty->print("%d ",num_regs());
+  _mask.dump();
+  if( _msize_valid ) {
+    if( mask_size() == compute_mask_size() ) tty->print(", #%d ",_mask_size);
+    else tty->print(", #!!!_%d_vs_%d ",_mask_size,_mask.Size());
+  } else {
+    tty->print(", #?(%d) ",_mask.Size());
+  }
+
+  tty->print("EffDeg: ");
+  if( _degree_valid ) tty->print( "%d ", _eff_degree );
+  else tty->print("? ");
+
+  if( _def == NodeSentinel ) {
+    tty->print("MultiDef ");
+    if (_defs != NULL) {
+      tty->print("(");
+      for (int i = 0; i < _defs->length(); i++) {
+        tty->print("N%d ", _defs->at(i)->_idx);
+      }
+      tty->print(") ");
+    }
+  }
+  else if( _def == 0 ) tty->print("Dead ");
+  else tty->print("Def: N%d ",_def->_idx);
+
+  tty->print("Cost:%4.2g Area:%4.2g Score:%4.2g ",_cost,_area, score());
+  // Flags
+  if( _is_oop ) tty->print("Oop ");
+  if( _is_float ) tty->print("Float ");
+  if( _was_spilled1 ) tty->print("Spilled ");
+  if( _was_spilled2 ) tty->print("Spilled2 ");
+  if( _direct_conflict ) tty->print("Direct_conflict ");
+  if( _fat_proj ) tty->print("Fat ");
+  if( _was_lo ) tty->print("Lo ");
+  if( _has_copy ) tty->print("Copy ");
+  if( _at_risk ) tty->print("Risk ");
+
+  if( _must_spill ) tty->print("Must_spill ");
+  if( _is_bound ) tty->print("Bound ");
+  if( _msize_valid ) {
+    if( _degree_valid && lo_degree() ) tty->print("Trivial ");
+  }
+
+  tty->cr();
+}
+#endif
+
+//------------------------------score------------------------------------------
+// Compute score from cost and area.  Low score is best to spill.
+static double raw_score( double cost, double area ) {
+  return cost - (area*RegisterCostAreaRatio) * 1.52588e-5;
+}
+
+double LRG::score() const {
+  // Scale _area by RegisterCostAreaRatio/64K then subtract from cost.
+  // Bigger area lowers score, encourages spilling this live range.
+  // Bigger cost raise score, prevents spilling this live range.
+  // (Note: 1/65536 is the magic constant below; I dont trust the C optimizer
+  // to turn a divide by a constant into a multiply by the reciprical).
+  double score = raw_score( _cost, _area);
+
+  // Account for area.  Basically, LRGs covering large areas are better
+  // to spill because more other LRGs get freed up.
+  if( _area == 0.0 )            // No area?  Then no progress to spill
+    return 1e35;
+
+  if( _was_spilled2 )           // If spilled once before, we are unlikely
+    return score + 1e30;        // to make progress again.
+
+  if( _cost >= _area*3.0 )      // Tiny area relative to cost
+    return score + 1e17;        // Probably no progress to spill
+
+  if( (_cost+_cost) >= _area*3.0 ) // Small area relative to cost
+    return score + 1e10;        // Likely no progress to spill
+
+  return score;
+}
+
+//------------------------------LRG_List---------------------------------------
+LRG_List::LRG_List( uint max ) : _cnt(max), _max(max), _lidxs(NEW_RESOURCE_ARRAY(uint,max)) {
+  memset( _lidxs, 0, sizeof(uint)*max );
+}
+
+void LRG_List::extend( uint nidx, uint lidx ) {
+  _nesting.check();
+  if( nidx >= _max ) {
+    uint size = 16;
+    while( size <= nidx ) size <<=1;
+    _lidxs = REALLOC_RESOURCE_ARRAY( uint, _lidxs, _max, size );
+    _max = size;
+  }
+  while( _cnt <= nidx )
+    _lidxs[_cnt++] = 0;
+  _lidxs[nidx] = lidx;
+}
+
+#define NUMBUCKS 3
+
+//------------------------------Chaitin----------------------------------------
+PhaseChaitin::PhaseChaitin(uint unique, PhaseCFG &cfg, Matcher &matcher)
+  : PhaseRegAlloc(unique, cfg, matcher,
+#ifndef PRODUCT
+       print_chaitin_statistics
+#else
+       NULL
+#endif
+       ),
+    _names(unique), _uf_map(unique),
+    _maxlrg(0), _live(0),
+    _spilled_once(Thread::current()->resource_area()),
+    _spilled_twice(Thread::current()->resource_area()),
+    _lo_degree(0), _lo_stk_degree(0), _hi_degree(0), _simplified(0),
+    _oldphi(unique)
+#ifndef PRODUCT
+  , _trace_spilling(TraceSpilling || C->method_has_option("TraceSpilling"))
+#endif
+{
+  NOT_PRODUCT( Compile::TracePhase t3("ctorChaitin", &_t_ctorChaitin, TimeCompiler); )
+  uint i,j;
+  // Build a list of basic blocks, sorted by frequency
+  _blks = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+  // Experiment with sorting strategies to speed compilation
+  double  cutoff = BLOCK_FREQUENCY(1.0); // Cutoff for high frequency bucket
+  Block **buckets[NUMBUCKS];             // Array of buckets
+  uint    buckcnt[NUMBUCKS];             // Array of bucket counters
+  double  buckval[NUMBUCKS];             // Array of bucket value cutoffs
+  for( i = 0; i < NUMBUCKS; i++ ) {
+    buckets[i] = NEW_RESOURCE_ARRAY( Block *, _cfg._num_blocks );
+    buckcnt[i] = 0;
+    // Bump by three orders of magnitude each time
+    cutoff *= 0.001;
+    buckval[i] = cutoff;
+    for( j = 0; j < _cfg._num_blocks; j++ ) {
+      buckets[i][j] = NULL;
+    }
+  }
+  // Sort blocks into buckets
+  for( i = 0; i < _cfg._num_blocks; i++ ) {
+    for( j = 0; j < NUMBUCKS; j++ ) {
+      if( (j == NUMBUCKS-1) || (_cfg._blocks[i]->_freq > buckval[j]) ) {
+        // Assign block to end of list for appropriate bucket
+        buckets[j][buckcnt[j]++] = _cfg._blocks[i];
+        break;                      // kick out of inner loop
+      }
+    }
+  }
+  // Dump buckets into final block array
+  uint blkcnt = 0;
+  for( i = 0; i < NUMBUCKS; i++ ) {
+    for( j = 0; j < buckcnt[i]; j++ ) {
+      _blks[blkcnt++] = buckets[i][j];
+    }
+  }
+
+  assert(blkcnt == _cfg._num_blocks, "Block array not totally filled");
+}
+
+void PhaseChaitin::Register_Allocate() {
+
+  // Above the OLD FP (and in registers) are the incoming arguments.  Stack
+  // slots in this area are called "arg_slots".  Above the NEW FP (and in
+  // registers) is the outgoing argument area; above that is the spill/temp
+  // area.  These are all "frame_slots".  Arg_slots start at the zero
+  // stack_slots and count up to the known arg_size.  Frame_slots start at
+  // the stack_slot #arg_size and go up.  After allocation I map stack
+  // slots to actual offsets.  Stack-slots in the arg_slot area are biased
+  // by the frame_size; stack-slots in the frame_slot area are biased by 0.
+
+  _trip_cnt = 0;
+  _alternate = 0;
+  _matcher._allocation_started = true;
+
+  ResourceArea live_arena;      // Arena for liveness & IFG info
+  ResourceMark rm(&live_arena);
+
+  // Need live-ness for the IFG; need the IFG for coalescing.  If the
+  // liveness is JUST for coalescing, then I can get some mileage by renaming
+  // all copy-related live ranges low and then using the max copy-related
+  // live range as a cut-off for LIVE and the IFG.  In other words, I can
+  // build a subset of LIVE and IFG just for copies.
+  PhaseLive live(_cfg,_names,&live_arena);
+
+  // Need IFG for coalescing and coloring
+  PhaseIFG ifg( &live_arena );
+  _ifg = &ifg;
+
+  if (C->unique() > _names.Size())  _names.extend(C->unique()-1, 0);
+
+  // Come out of SSA world to the Named world.  Assign (virtual) registers to
+  // Nodes.  Use the same register for all inputs and the output of PhiNodes
+  // - effectively ending SSA form.  This requires either coalescing live
+  // ranges or inserting copies.  For the moment, we insert "virtual copies"
+  // - we pretend there is a copy prior to each Phi in predecessor blocks.
+  // We will attempt to coalesce such "virtual copies" before we manifest
+  // them for real.
+  de_ssa();
+
+  {
+    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+    _live = NULL;                 // Mark live as being not available
+    rm.reset_to_mark();           // Reclaim working storage
+    IndexSet::reset_memory(C, &live_arena);
+    ifg.init(_maxlrg);            // Empty IFG
+    gather_lrg_masks( false );    // Collect LRG masks
+    live.compute( _maxlrg );      // Compute liveness
+    _live = &live;                // Mark LIVE as being available
+  }
+
+  // Base pointers are currently "used" by instructions which define new
+  // derived pointers.  This makes base pointers live up to the where the
+  // derived pointer is made, but not beyond.  Really, they need to be live
+  // across any GC point where the derived value is live.  So this code looks
+  // at all the GC points, and "stretches" the live range of any base pointer
+  // to the GC point.
+  if( stretch_base_pointer_live_ranges(&live_arena) ) {
+    NOT_PRODUCT( Compile::TracePhase t3("computeLive (sbplr)", &_t_computeLive, TimeCompiler); )
+    // Since some live range stretched, I need to recompute live
+    _live = NULL;
+    rm.reset_to_mark();         // Reclaim working storage
+    IndexSet::reset_memory(C, &live_arena);
+    ifg.init(_maxlrg);
+    gather_lrg_masks( false );
+    live.compute( _maxlrg );
+    _live = &live;
+  }
+  // Create the interference graph using virtual copies
+  build_ifg_virtual( );  // Include stack slots this time
+
+  // Aggressive (but pessimistic) copy coalescing.
+  // This pass works on virtual copies.  Any virtual copies which are not
+  // coalesced get manifested as actual copies
+  {
+    // The IFG is/was triangular.  I am 'squaring it up' so Union can run
+    // faster.  Union requires a 'for all' operation which is slow on the
+    // triangular adjacency matrix (quick reminder: the IFG is 'sparse' -
+    // meaning I can visit all the Nodes neighbors less than a Node in time
+    // O(# of neighbors), but I have to visit all the Nodes greater than a
+    // given Node and search them for an instance, i.e., time O(#MaxLRG)).
+    _ifg->SquareUp();
+
+    PhaseAggressiveCoalesce coalesce( *this );
+    coalesce.coalesce_driver( );
+    // Insert un-coalesced copies.  Visit all Phis.  Where inputs to a Phi do
+    // not match the Phi itself, insert a copy.
+    coalesce.insert_copies(_matcher);
+  }
+
+  // After aggressive coalesce, attempt a first cut at coloring.
+  // To color, we need the IFG and for that we need LIVE.
+  {
+    NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+    _live = NULL;
+    rm.reset_to_mark();           // Reclaim working storage
+    IndexSet::reset_memory(C, &live_arena);
+    ifg.init(_maxlrg);
+    gather_lrg_masks( true );
+    live.compute( _maxlrg );
+    _live = &live;
+  }
+
+  // Build physical interference graph
+  uint must_spill = 0;
+  must_spill = build_ifg_physical( &live_arena );
+  // If we have a guaranteed spill, might as well spill now
+  if( must_spill ) {
+    if( !_maxlrg ) return;
+    // Bail out if unique gets too large (ie - unique > MaxNodeLimit)
+    C->check_node_count(10*must_spill, "out of nodes before split");
+    if (C->failing())  return;
+    _maxlrg = Split( _maxlrg );        // Split spilling LRG everywhere
+    // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
+    // or we failed to split
+    C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after physical split");
+    if (C->failing())  return;
+
+#ifdef ASSERT
+    if( VerifyOpto ) {
+      _cfg.verify();
+      verify_base_ptrs(&live_arena);
+    }
+#endif
+    NOT_PRODUCT( C->verify_graph_edges(); )
+
+    compact();                  // Compact LRGs; return new lower max lrg
+
+    {
+      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+      _live = NULL;
+      rm.reset_to_mark();         // Reclaim working storage
+      IndexSet::reset_memory(C, &live_arena);
+      ifg.init(_maxlrg);          // Build a new interference graph
+      gather_lrg_masks( true );   // Collect intersect mask
+      live.compute( _maxlrg );    // Compute LIVE
+      _live = &live;
+    }
+    build_ifg_physical( &live_arena );
+    _ifg->SquareUp();
+    _ifg->Compute_Effective_Degree();
+    // Only do conservative coalescing if requested
+    if( OptoCoalesce ) {
+      // Conservative (and pessimistic) copy coalescing of those spills
+      PhaseConservativeCoalesce coalesce( *this );
+      // If max live ranges greater than cutoff, don't color the stack.
+      // This cutoff can be larger than below since it is only done once.
+      coalesce.coalesce_driver( );
+    }
+    compress_uf_map_for_nodes();
+
+#ifdef ASSERT
+    if( VerifyOpto ) _ifg->verify(this);
+#endif
+  } else {
+    ifg.SquareUp();
+    ifg.Compute_Effective_Degree();
+#ifdef ASSERT
+    set_was_low();
+#endif
+  }
+
+  // Prepare for Simplify & Select
+  cache_lrg_info();           // Count degree of LRGs
+
+  // Simplify the InterFerence Graph by removing LRGs of low degree.
+  // LRGs of low degree are trivially colorable.
+  Simplify();
+
+  // Select colors by re-inserting LRGs back into the IFG in reverse order.
+  // Return whether or not something spills.
+  uint spills = Select( );
+
+  // If we spill, split and recycle the entire thing
+  while( spills ) {
+    if( _trip_cnt++ > 24 ) {
+      DEBUG_ONLY( dump_for_spill_split_recycle(); )
+      if( _trip_cnt > 27 ) {
+        C->record_method_not_compilable("failed spill-split-recycle sanity check");
+        return;
+      }
+    }
+
+    if( !_maxlrg ) return;
+    _maxlrg = Split( _maxlrg );        // Split spilling LRG everywhere
+    // Bail out if unique gets too large (ie - unique > MaxNodeLimit - 2*NodeLimitFudgeFactor)
+    C->check_node_count(2*NodeLimitFudgeFactor, "out of nodes after split");
+    if (C->failing())  return;
+#ifdef ASSERT
+    if( VerifyOpto ) {
+      _cfg.verify();
+      verify_base_ptrs(&live_arena);
+    }
+#endif
+
+    compact();                  // Compact LRGs; return new lower max lrg
+
+    // Nuke the live-ness and interference graph and LiveRanGe info
+    {
+      NOT_PRODUCT( Compile::TracePhase t3("computeLive", &_t_computeLive, TimeCompiler); )
+      _live = NULL;
+      rm.reset_to_mark();         // Reclaim working storage
+      IndexSet::reset_memory(C, &live_arena);
+      ifg.init(_maxlrg);
+
+      // Create LiveRanGe array.
+      // Intersect register masks for all USEs and DEFs
+      gather_lrg_masks( true );
+      live.compute( _maxlrg );
+      _live = &live;
+    }
+    must_spill = build_ifg_physical( &live_arena );
+    _ifg->SquareUp();
+    _ifg->Compute_Effective_Degree();
+
+    // Only do conservative coalescing if requested
+    if( OptoCoalesce ) {
+      // Conservative (and pessimistic) copy coalescing
+      PhaseConservativeCoalesce coalesce( *this );
+      // Check for few live ranges determines how aggressive coalesce is.
+      coalesce.coalesce_driver( );
+    }
+    compress_uf_map_for_nodes();
+#ifdef ASSERT
+    if( VerifyOpto ) _ifg->verify(this);
+#endif
+    cache_lrg_info();           // Count degree of LRGs
+
+    // Simplify the InterFerence Graph by removing LRGs of low degree.
+    // LRGs of low degree are trivially colorable.
+    Simplify();
+
+    // Select colors by re-inserting LRGs back into the IFG in reverse order.
+    // Return whether or not something spills.
+    spills = Select( );
+  }
+
+  // Count number of Simplify-Select trips per coloring success.
+  _allocator_attempts += _trip_cnt + 1;
+  _allocator_successes += 1;
+
+  // Peephole remove copies
+  post_allocate_copy_removal();
+
+  // max_reg is past the largest *register* used.
+  // Convert that to a frame_slot number.
+  if( _max_reg <= _matcher._new_SP )
+    _framesize = C->out_preserve_stack_slots();
+  else _framesize = _max_reg -_matcher._new_SP;
+  assert((int)(_matcher._new_SP+_framesize) >= (int)_matcher._out_arg_limit, "framesize must be large enough");
+
+  // This frame must preserve the required fp alignment
+  const int stack_alignment_in_words = Matcher::stack_alignment_in_slots();
+  if (stack_alignment_in_words > 0)
+    _framesize = round_to(_framesize, Matcher::stack_alignment_in_bytes());
+  assert( _framesize >= 0 && _framesize <= 1000000, "sanity check" );
+#ifndef PRODUCT
+  _total_framesize += _framesize;
+  if( (int)_framesize > _max_framesize )
+    _max_framesize = _framesize;
+#endif
+
+  // Convert CISC spills
+  fixup_spills();
+
+  // Log regalloc results
+  CompileLog* log = Compile::current()->log();
+  if (log != NULL) {
+    log->elem("regalloc attempts='%d' success='%d'", _trip_cnt, !C->failing());
+  }
+
+  if (C->failing())  return;
+
+  NOT_PRODUCT( C->verify_graph_edges(); )
+
+  // Move important info out of the live_arena to longer lasting storage.
+  alloc_node_regs(_names.Size());
+  for( uint i=0; i < _names.Size(); i++ ) {
+    if( _names[i] ) {           // Live range associated with Node?
+      LRG &lrg = lrgs( _names[i] );
+      if( lrg.num_regs() == 1 ) {
+        _node_regs[i].set1( lrg.reg() );
+      } else {                  // Must be a register-pair
+        if( !lrg._fat_proj ) {  // Must be aligned adjacent register pair
+          // Live ranges record the highest register in their mask.
+          // We want the low register for the AD file writer's convenience.
+          _node_regs[i].set2( OptoReg::add(lrg.reg(),-1) );
+        } else {                // Misaligned; extract 2 bits
+          OptoReg::Name hi = lrg.reg(); // Get hi register
+          lrg.Remove(hi);       // Yank from mask
+          int lo = lrg.mask().find_first_elem(); // Find lo
+          _node_regs[i].set_pair( hi, lo );
+        }
+      }
+      if( lrg._is_oop ) _node_oops.set(i);
+    } else {
+      _node_regs[i].set_bad();
+    }
+  }
+
+  // Done!
+  _live = NULL;
+  _ifg = NULL;
+  C->set_indexSet_arena(NULL);  // ResourceArea is at end of scope
+}
+
+//------------------------------de_ssa-----------------------------------------
+void PhaseChaitin::de_ssa() {
+  // Set initial Names for all Nodes.  Most Nodes get the virtual register
+  // number.  A few get the ZERO live range number.  These do not
+  // get allocated, but instead rely on correct scheduling to ensure that
+  // only one instance is simultaneously live at a time.
+  uint lr_counter = 1;
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    uint cnt = b->_nodes.size();
+
+    // Handle all the normal Nodes in the block
+    for( uint j = 0; j < cnt; j++ ) {
+      Node *n = b->_nodes[j];
+      // Pre-color to the zero live range, or pick virtual register
+      const RegMask &rm = n->out_RegMask();
+      _names.map( n->_idx, rm.is_NotEmpty() ? lr_counter++ : 0 );
+    }
+  }
+  // Reset the Union-Find mapping to be identity
+  reset_uf_map(lr_counter);
+}
+
+
+//------------------------------gather_lrg_masks-------------------------------
+// Gather LiveRanGe information, including register masks.  Modification of
+// cisc spillable in_RegMasks should not be done before AggressiveCoalesce.
+void PhaseChaitin::gather_lrg_masks( bool after_aggressive ) {
+
+  // Nail down the frame pointer live range
+  uint fp_lrg = n2lidx(_cfg._root->in(1)->in(TypeFunc::FramePtr));
+  lrgs(fp_lrg)._cost += 1e12;   // Cost is infinite
+
+  // For all blocks
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+
+    // For all instructions
+    for( uint j = 1; j < b->_nodes.size(); j++ ) {
+      Node *n = b->_nodes[j];
+      uint input_edge_start =1; // Skip control most nodes
+      if( n->is_Mach() ) input_edge_start = n->as_Mach()->oper_input_base();
+      uint idx = n->is_Copy();
+
+      // Get virtual register number, same as LiveRanGe index
+      uint vreg = n2lidx(n);
+      LRG &lrg = lrgs(vreg);
+      if( vreg ) {              // No vreg means un-allocable (e.g. memory)
+
+        // Collect has-copy bit
+        if( idx ) {
+          lrg._has_copy = 1;
+          uint clidx = n2lidx(n->in(idx));
+          LRG &copy_src = lrgs(clidx);
+          copy_src._has_copy = 1;
+        }
+
+        // Check for float-vs-int live range (used in register-pressure
+        // calculations)
+        const Type *n_type = n->bottom_type();
+        if( n_type->is_floatingpoint() )
+          lrg._is_float = 1;
+
+        // Check for twice prior spilling.  Once prior spilling might have
+        // spilled 'soft', 2nd prior spill should have spilled 'hard' and
+        // further spilling is unlikely to make progress.
+        if( _spilled_once.test(n->_idx) ) {
+          lrg._was_spilled1 = 1;
+          if( _spilled_twice.test(n->_idx) )
+            lrg._was_spilled2 = 1;
+        }
+
+#ifndef PRODUCT
+        if (trace_spilling() && lrg._def != NULL) {
+          // collect defs for MultiDef printing
+          if (lrg._defs == NULL) {
+            lrg._defs = new (_ifg->_arena) GrowableArray<Node*>();
+            lrg._defs->append(lrg._def);
+          }
+          lrg._defs->append(n);
+        }
+#endif
+
+        // Check for a single def LRG; these can spill nicely
+        // via rematerialization.  Flag as NULL for no def found
+        // yet, or 'n' for single def or -1 for many defs.
+        lrg._def = lrg._def ? NodeSentinel : n;
+
+        // Limit result register mask to acceptable registers
+        const RegMask &rm = n->out_RegMask();
+        lrg.AND( rm );
+        // Check for bound register masks
+        const RegMask &lrgmask = lrg.mask();
+        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+          lrg._is_bound = 1;
+
+        // Check for maximum frequency value
+        if( lrg._maxfreq < b->_freq )
+          lrg._maxfreq = b->_freq;
+
+        int ireg = n->ideal_reg();
+        assert( !n->bottom_type()->isa_oop_ptr() || ireg == Op_RegP,
+                "oops must be in Op_RegP's" );
+        // Check for oop-iness, or long/double
+        // Check for multi-kill projection
+        switch( ireg ) {
+        case MachProjNode::fat_proj:
+          // Fat projections have size equal to number of registers killed
+          lrg.set_num_regs(rm.Size());
+          lrg.set_reg_pressure(lrg.num_regs());
+          lrg._fat_proj = 1;
+          lrg._is_bound = 1;
+          break;
+        case Op_RegP:
+#ifdef _LP64
+          lrg.set_num_regs(2);  // Size is 2 stack words
+#else
+          lrg.set_num_regs(1);  // Size is 1 stack word
+#endif
+          // Register pressure is tracked relative to the maximum values
+          // suggested for that platform, INTPRESSURE and FLOATPRESSURE,
+          // and relative to other types which compete for the same regs.
+          //
+          // The following table contains suggested values based on the
+          // architectures as defined in each .ad file.
+          // INTPRESSURE and FLOATPRESSURE may be tuned differently for
+          // compile-speed or performance.
+          // Note1:
+          // SPARC and SPARCV9 reg_pressures are at 2 instead of 1
+          // since .ad registers are defined as high and low halves.
+          // These reg_pressure values remain compatible with the code
+          // in is_high_pressure() which relates get_invalid_mask_size(),
+          // Block::_reg_pressure and INTPRESSURE, FLOATPRESSURE.
+          // Note2:
+          // SPARC -d32 has 24 registers available for integral values,
+          // but only 10 of these are safe for 64-bit longs.
+          // Using set_reg_pressure(2) for both int and long means
+          // the allocator will believe it can fit 26 longs into
+          // registers.  Using 2 for longs and 1 for ints means the
+          // allocator will attempt to put 52 integers into registers.
+          // The settings below limit this problem to methods with
+          // many long values which are being run on 32-bit SPARC.
+          //
+          // ------------------- reg_pressure --------------------
+          // Each entry is reg_pressure_per_value,number_of_regs
+          //         RegL  RegI  RegFlags   RegF RegD    INTPRESSURE  FLOATPRESSURE
+          // IA32     2     1     1          1    1          6           6
+          // IA64     1     1     1          1    1         50          41
+          // SPARC    2     2     2          2    2         48 (24)     52 (26)
+          // SPARCV9  2     2     2          2    2         48 (24)     52 (26)
+          // AMD64    1     1     1          1    1         14          15
+          // -----------------------------------------------------
+#if defined(SPARC)
+          lrg.set_reg_pressure(2);  // use for v9 as well
+#else
+          lrg.set_reg_pressure(1);  // normally one value per register
+#endif
+          if( n_type->isa_oop_ptr() ) {
+            lrg._is_oop = 1;
+          }
+          break;
+        case Op_RegL:           // Check for long or double
+        case Op_RegD:
+          lrg.set_num_regs(2);
+          // Define platform specific register pressure
+#ifdef SPARC
+          lrg.set_reg_pressure(2);
+#elif defined(IA32)
+          if( ireg == Op_RegL ) {
+            lrg.set_reg_pressure(2);
+          } else {
+            lrg.set_reg_pressure(1);
+          }
+#else
+          lrg.set_reg_pressure(1);  // normally one value per register
+#endif
+          // If this def of a double forces a mis-aligned double,
+          // flag as '_fat_proj' - really flag as allowing misalignment
+          // AND changes how we count interferences.  A mis-aligned
+          // double can interfere with TWO aligned pairs, or effectively
+          // FOUR registers!
+          if( rm.is_misaligned_Pair() ) {
+            lrg._fat_proj = 1;
+            lrg._is_bound = 1;
+          }
+          break;
+        case Op_RegF:
+        case Op_RegI:
+        case Op_RegFlags:
+        case 0:                 // not an ideal register
+          lrg.set_num_regs(1);
+#ifdef SPARC
+          lrg.set_reg_pressure(2);
+#else
+          lrg.set_reg_pressure(1);
+#endif
+          break;
+        default:
+          ShouldNotReachHere();
+        }
+      }
+
+      // Now do the same for inputs
+      uint cnt = n->req();
+      // Setup for CISC SPILLING
+      uint inp = (uint)AdlcVMDeps::Not_cisc_spillable;
+      if( UseCISCSpill && after_aggressive ) {
+        inp = n->cisc_operand();
+        if( inp != (uint)AdlcVMDeps::Not_cisc_spillable )
+          // Convert operand number to edge index number
+          inp = n->as_Mach()->operand_index(inp);
+      }
+      // Prepare register mask for each input
+      for( uint k = input_edge_start; k < cnt; k++ ) {
+        uint vreg = n2lidx(n->in(k));
+        if( !vreg ) continue;
+
+        // If this instruction is CISC Spillable, add the flags
+        // bit to its appropriate input
+        if( UseCISCSpill && after_aggressive && inp == k ) {
+#ifndef PRODUCT
+          if( TraceCISCSpill ) {
+            tty->print("  use_cisc_RegMask: ");
+            n->dump();
+          }
+#endif
+          n->as_Mach()->use_cisc_RegMask();
+        }
+
+        LRG &lrg = lrgs(vreg);
+        // // Testing for floating point code shape
+        // Node *test = n->in(k);
+        // if( test->is_Mach() ) {
+        //   MachNode *m = test->as_Mach();
+        //   int  op = m->ideal_Opcode();
+        //   if (n->is_Call() && (op == Op_AddF || op == Op_MulF) ) {
+        //     int zzz = 1;
+        //   }
+        // }
+
+        // Limit result register mask to acceptable registers.
+        // Do not limit registers from uncommon uses before
+        // AggressiveCoalesce.  This effectively pre-virtual-splits
+        // around uncommon uses of common defs.
+        const RegMask &rm = n->in_RegMask(k);
+        if( !after_aggressive &&
+          _cfg._bbs[n->in(k)->_idx]->_freq > 1000*b->_freq ) {
+          // Since we are BEFORE aggressive coalesce, leave the register
+          // mask untrimmed by the call.  This encourages more coalescing.
+          // Later, AFTER aggressive, this live range will have to spill
+          // but the spiller handles slow-path calls very nicely.
+        } else {
+          lrg.AND( rm );
+        }
+        // Check for bound register masks
+        const RegMask &lrgmask = lrg.mask();
+        if( lrgmask.is_bound1() || lrgmask.is_bound2() )
+          lrg._is_bound = 1;
+        // If this use of a double forces a mis-aligned double,
+        // flag as '_fat_proj' - really flag as allowing misalignment
+        // AND changes how we count interferences.  A mis-aligned
+        // double can interfere with TWO aligned pairs, or effectively
+        // FOUR registers!
+        if( lrg.num_regs() == 2 && !lrg._fat_proj && rm.is_misaligned_Pair() ) {
+          lrg._fat_proj = 1;
+          lrg._is_bound = 1;
+        }
+        // if the LRG is an unaligned pair, we will have to spill
+        // so clear the LRG's register mask if it is not already spilled
+        if ( !n->is_SpillCopy() &&
+               (lrg._def == NULL || lrg._def == NodeSentinel || !lrg._def->is_SpillCopy()) &&
+               lrgmask.is_misaligned_Pair()) {
+          lrg.Clear();
+        }
+
+        // Check for maximum frequency value
+        if( lrg._maxfreq < b->_freq )
+          lrg._maxfreq = b->_freq;
+
+      } // End for all allocated inputs
+    } // end for all instructions
+  } // end for all blocks
+
+  // Final per-liverange setup
+  for( uint i2=0; i2<_maxlrg; i2++ ) {
+    LRG &lrg = lrgs(i2);
+    if( lrg.num_regs() == 2 && !lrg._fat_proj )
+      lrg.ClearToPairs();
+    lrg.compute_set_mask_size();
+    if( lrg.not_free() ) {      // Handle case where we lose from the start
+      lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
+      lrg._direct_conflict = 1;
+    }
+    lrg.set_degree(0);          // no neighbors in IFG yet
+  }
+}
+
+//------------------------------set_was_low------------------------------------
+// Set the was-lo-degree bit.  Conservative coalescing should not change the
+// colorability of the graph.  If any live range was of low-degree before
+// coalescing, it should Simplify.  This call sets the was-lo-degree bit.
+// The bit is checked in Simplify.
+void PhaseChaitin::set_was_low() {
+#ifdef ASSERT
+  for( uint i = 1; i < _maxlrg; i++ ) {
+    int size = lrgs(i).num_regs();
+    uint old_was_lo = lrgs(i)._was_lo;
+    lrgs(i)._was_lo = 0;
+    if( lrgs(i).lo_degree() ) {
+      lrgs(i)._was_lo = 1;      // Trivially of low degree
+    } else {                    // Else check the Brigg's assertion
+      // Brigg's observation is that the lo-degree neighbors of a
+      // hi-degree live range will not interfere with the color choices
+      // of said hi-degree live range.  The Simplify reverse-stack-coloring
+      // order takes care of the details.  Hence you do not have to count
+      // low-degree neighbors when determining if this guy colors.
+      int briggs_degree = 0;
+      IndexSet *s = _ifg->neighbors(i);
+      IndexSetIterator elements(s);
+      uint lidx;
+      while((lidx = elements.next()) != 0) {
+        if( !lrgs(lidx).lo_degree() )
+          briggs_degree += MAX2(size,lrgs(lidx).num_regs());
+      }
+      if( briggs_degree < lrgs(i).degrees_of_freedom() )
+        lrgs(i)._was_lo = 1;    // Low degree via the briggs assertion
+    }
+    assert(old_was_lo <= lrgs(i)._was_lo, "_was_lo may not decrease");
+  }
+#endif
+}
+
+#define REGISTER_CONSTRAINED 16
+
+//------------------------------cache_lrg_info---------------------------------
+// Compute cost/area ratio, in case we spill.  Build the lo-degree list.
+void PhaseChaitin::cache_lrg_info( ) {
+
+  for( uint i = 1; i < _maxlrg; i++ ) {
+    LRG &lrg = lrgs(i);
+
+    // Check for being of low degree: means we can be trivially colored.
+    // Low degree, dead or must-spill guys just get to simplify right away
+    if( lrg.lo_degree() ||
+       !lrg.alive() ||
+        lrg._must_spill ) {
+      // Split low degree list into those guys that must get a
+      // register and those that can go to register or stack.
+      // The idea is LRGs that can go register or stack color first when
+      // they have a good chance of getting a register.  The register-only
+      // lo-degree live ranges always get a register.
+      OptoReg::Name hi_reg = lrg.mask().find_last_elem();
+      if( OptoReg::is_stack(hi_reg)) { // Can go to stack?
+        lrg._next = _lo_stk_degree;
+        _lo_stk_degree = i;
+      } else {
+        lrg._next = _lo_degree;
+        _lo_degree = i;
+      }
+    } else {                    // Else high degree
+      lrgs(_hi_degree)._prev = i;
+      lrg._next = _hi_degree;
+      lrg._prev = 0;
+      _hi_degree = i;
+    }
+  }
+}
+
+//------------------------------Pre-Simplify-----------------------------------
+// Simplify the IFG by removing LRGs of low degree that have NO copies
+void PhaseChaitin::Pre_Simplify( ) {
+
+  // Warm up the lo-degree no-copy list
+  int lo_no_copy = 0;
+  for( uint i = 1; i < _maxlrg; i++ ) {
+    if( (lrgs(i).lo_degree() && !lrgs(i)._has_copy) ||
+        !lrgs(i).alive() ||
+        lrgs(i)._must_spill ) {
+      lrgs(i)._next = lo_no_copy;
+      lo_no_copy = i;
+    }
+  }
+
+  while( lo_no_copy ) {
+    uint lo = lo_no_copy;
+    lo_no_copy = lrgs(lo)._next;
+    int size = lrgs(lo).num_regs();
+
+    // Put the simplified guy on the simplified list.
+    lrgs(lo)._next = _simplified;
+    _simplified = lo;
+
+    // Yank this guy from the IFG.
+    IndexSet *adj = _ifg->remove_node( lo );
+
+    // If any neighbors' degrees fall below their number of
+    // allowed registers, then put that neighbor on the low degree
+    // list.  Note that 'degree' can only fall and 'numregs' is
+    // unchanged by this action.  Thus the two are equal at most once,
+    // so LRGs hit the lo-degree worklists at most once.
+    IndexSetIterator elements(adj);
+    uint neighbor;
+    while ((neighbor = elements.next()) != 0) {
+      LRG *n = &lrgs(neighbor);
+      assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
+
+      // Check for just becoming of-low-degree
+      if( n->just_lo_degree() && !n->_has_copy ) {
+        assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
+        // Put on lo-degree list
+        n->_next = lo_no_copy;
+        lo_no_copy = neighbor;
+      }
+    }
+  } // End of while lo-degree no_copy worklist not empty
+
+  // No more lo-degree no-copy live ranges to simplify
+}
+
+//------------------------------Simplify---------------------------------------
+// Simplify the IFG by removing LRGs of low degree.
+void PhaseChaitin::Simplify( ) {
+
+  while( 1 ) {                  // Repeat till simplified it all
+    // May want to explore simplifying lo_degree before _lo_stk_degree.
+    // This might result in more spills coloring into registers during
+    // Select().
+    while( _lo_degree || _lo_stk_degree ) {
+      // If possible, pull from lo_stk first
+      uint lo;
+      if( _lo_degree ) {
+        lo = _lo_degree;
+        _lo_degree = lrgs(lo)._next;
+      } else {
+        lo = _lo_stk_degree;
+        _lo_stk_degree = lrgs(lo)._next;
+      }
+
+      // Put the simplified guy on the simplified list.
+      lrgs(lo)._next = _simplified;
+      _simplified = lo;
+      // If this guy is "at risk" then mark his current neighbors
+      if( lrgs(lo)._at_risk ) {
+        IndexSetIterator elements(_ifg->neighbors(lo));
+        uint datum;
+        while ((datum = elements.next()) != 0) {
+          lrgs(datum)._risk_bias = lo;
+        }
+      }
+
+      // Yank this guy from the IFG.
+      IndexSet *adj = _ifg->remove_node( lo );
+
+      // If any neighbors' degrees fall below their number of
+      // allowed registers, then put that neighbor on the low degree
+      // list.  Note that 'degree' can only fall and 'numregs' is
+      // unchanged by this action.  Thus the two are equal at most once,
+      // so LRGs hit the lo-degree worklist at most once.
+      IndexSetIterator elements(adj);
+      uint neighbor;
+      while ((neighbor = elements.next()) != 0) {
+        LRG *n = &lrgs(neighbor);
+#ifdef ASSERT
+        if( VerifyOpto ) {
+          assert( _ifg->effective_degree(neighbor) == n->degree(), "" );
+        }
+#endif
+
+        // Check for just becoming of-low-degree just counting registers.
+        // _must_spill live ranges are already on the low degree list.
+        if( n->just_lo_degree() && !n->_must_spill ) {
+          assert(!(*_ifg->_yanked)[neighbor],"Cannot move to lo degree twice");
+          // Pull from hi-degree list
+          uint prev = n->_prev;
+          uint next = n->_next;
+          if( prev ) lrgs(prev)._next = next;
+          else _hi_degree = next;
+          lrgs(next)._prev = prev;
+          n->_next = _lo_degree;
+          _lo_degree = neighbor;
+        }
+      }
+    } // End of while lo-degree/lo_stk_degree worklist not empty
+
+    // Check for got everything: is hi-degree list empty?
+    if( !_hi_degree ) break;
+
+    // Time to pick a potential spill guy
+    uint lo_score = _hi_degree;
+    double score = lrgs(lo_score).score();
+    double area = lrgs(lo_score)._area;
+
+    // Find cheapest guy
+    debug_only( int lo_no_simplify=0; );
+    for( uint i = _hi_degree; i; i = lrgs(i)._next ) {
+      assert( !(*_ifg->_yanked)[i], "" );
+      // It's just vaguely possible to move hi-degree to lo-degree without
+      // going through a just-lo-degree stage: If you remove a double from
+      // a float live range it's degree will drop by 2 and you can skip the
+      // just-lo-degree stage.  It's very rare (shows up after 5000+ methods
+      // in -Xcomp of Java2Demo).  So just choose this guy to simplify next.
+      if( lrgs(i).lo_degree() ) {
+        lo_score = i;
+        break;
+      }
+      debug_only( if( lrgs(i)._was_lo ) lo_no_simplify=i; );
+      double iscore = lrgs(i).score();
+      double iarea = lrgs(i)._area;
+
+      // Compare cost/area of i vs cost/area of lo_score.  Smaller cost/area
+      // wins.  Ties happen because all live ranges in question have spilled
+      // a few times before and the spill-score adds a huge number which
+      // washes out the low order bits.  We are choosing the lesser of 2
+      // evils; in this case pick largest area to spill.
+      if( iscore < score ||
+          (iscore == score && iarea > area && lrgs(lo_score)._was_spilled2) ) {
+        lo_score = i;
+        score = iscore;
+        area = iarea;
+      }
+    }
+    LRG *lo_lrg = &lrgs(lo_score);
+    // The live range we choose for spilling is either hi-degree, or very
+    // rarely it can be low-degree.  If we choose a hi-degree live range
+    // there better not be any lo-degree choices.
+    assert( lo_lrg->lo_degree() || !lo_no_simplify, "Live range was lo-degree before coalesce; should simplify" );
+
+    // Pull from hi-degree list
+    uint prev = lo_lrg->_prev;
+    uint next = lo_lrg->_next;
+    if( prev ) lrgs(prev)._next = next;
+    else _hi_degree = next;
+    lrgs(next)._prev = prev;
+    // Jam him on the lo-degree list, despite his high degree.
+    // Maybe he'll get a color, and maybe he'll spill.
+    // Only Select() will know.
+    lrgs(lo_score)._at_risk = true;
+    _lo_degree = lo_score;
+    lo_lrg->_next = 0;
+
+  } // End of while not simplified everything
+
+}
+
+//------------------------------bias_color-------------------------------------
+// Choose a color using the biasing heuristic
+OptoReg::Name PhaseChaitin::bias_color( LRG &lrg, int chunk ) {
+
+  // Check for "at_risk" LRG's
+  uint risk_lrg = Find(lrg._risk_bias);
+  if( risk_lrg != 0 ) {
+    // Walk the colored neighbors of the "at_risk" candidate
+    // Choose a color which is both legal and already taken by a neighbor
+    // of the "at_risk" candidate in order to improve the chances of the
+    // "at_risk" candidate of coloring
+    IndexSetIterator elements(_ifg->neighbors(risk_lrg));
+    uint datum;
+    while ((datum = elements.next()) != 0) {
+      OptoReg::Name reg = lrgs(datum).reg();
+      // If this LRG's register is legal for us, choose it
+      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
+          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
+          (lrg.num_regs()==1 || // either size 1
+           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+        return reg;
+    }
+  }
+
+  uint copy_lrg = Find(lrg._copy_bias);
+  if( copy_lrg != 0 ) {
+    // If he has a color,
+    if( !(*(_ifg->_yanked))[copy_lrg] ) {
+      OptoReg::Name reg = lrgs(copy_lrg).reg();
+      //  And it is legal for you,
+      if( reg >= chunk && reg < chunk + RegMask::CHUNK_SIZE &&
+          lrg.mask().Member(OptoReg::add(reg,-chunk)) &&
+          (lrg.num_regs()==1 || // either size 1
+           (reg&1) == 1) )      // or aligned (adjacent reg is available since we already cleared-to-pairs)
+        return reg;
+    } else if( chunk == 0 ) {
+      // Choose a color which is legal for him
+      RegMask tempmask = lrg.mask();
+      tempmask.AND(lrgs(copy_lrg).mask());
+      OptoReg::Name reg;
+      if( lrg.num_regs() == 1 ) {
+        reg = tempmask.find_first_elem();
+      } else {
+        tempmask.ClearToPairs();
+        reg = tempmask.find_first_pair();
+      }
+      if( OptoReg::is_valid(reg) )
+        return reg;
+    }
+  }
+
+  // If no bias info exists, just go with the register selection ordering
+  if( lrg.num_regs() == 2 ) {
+    // Find an aligned pair
+    return OptoReg::add(lrg.mask().find_first_pair(),chunk);
+  }
+
+  // CNC - Fun hack.  Alternate 1st and 2nd selection.  Enables post-allocate
+  // copy removal to remove many more copies, by preventing a just-assigned
+  // register from being repeatedly assigned.
+  OptoReg::Name reg = lrg.mask().find_first_elem();
+  if( (++_alternate & 1) && OptoReg::is_valid(reg) ) {
+    // This 'Remove; find; Insert' idiom is an expensive way to find the
+    // SECOND element in the mask.
+    lrg.Remove(reg);
+    OptoReg::Name reg2 = lrg.mask().find_first_elem();
+    lrg.Insert(reg);
+    if( OptoReg::is_reg(reg2))
+      reg = reg2;
+  }
+  return OptoReg::add( reg, chunk );
+}
+
+//------------------------------choose_color-----------------------------------
+// Choose a color in the current chunk
+OptoReg::Name PhaseChaitin::choose_color( LRG &lrg, int chunk ) {
+  assert( C->in_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP-1)), "must not allocate stack0 (inside preserve area)");
+  assert(C->out_preserve_stack_slots() == 0 || chunk != 0 || lrg._is_bound || lrg.mask().is_bound1() || !lrg.mask().Member(OptoReg::Name(_matcher._old_SP+0)), "must not allocate stack0 (inside preserve area)");
+
+  if( lrg.num_regs() == 1 ||    // Common Case
+      !lrg._fat_proj )          // Aligned+adjacent pairs ok
+    // Use a heuristic to "bias" the color choice
+    return bias_color(lrg, chunk);
+
+  assert( lrg.num_regs() >= 2, "dead live ranges do not color" );
+
+  // Fat-proj case or misaligned double argument.
+  assert(lrg.compute_mask_size() == lrg.num_regs() ||
+         lrg.num_regs() == 2,"fat projs exactly color" );
+  assert( !chunk, "always color in 1st chunk" );
+  // Return the highest element in the set.
+  return lrg.mask().find_last_elem();
+}
+
+//------------------------------Select-----------------------------------------
+// Select colors by re-inserting LRGs back into the IFG.  LRGs are re-inserted
+// in reverse order of removal.  As long as nothing of hi-degree was yanked,
+// everything going back is guaranteed a color.  Select that color.  If some
+// hi-degree LRG cannot get a color then we record that we must spill.
+uint PhaseChaitin::Select( ) {
+  uint spill_reg = LRG::SPILL_REG;
+  _max_reg = OptoReg::Name(0);  // Past max register used
+  while( _simplified ) {
+    // Pull next LRG from the simplified list - in reverse order of removal
+    uint lidx = _simplified;
+    LRG *lrg = &lrgs(lidx);
+    _simplified = lrg->_next;
+
+
+#ifndef PRODUCT
+    if (trace_spilling()) {
+      ttyLocker ttyl;
+      tty->print_cr("L%d selecting degree %d degrees_of_freedom %d", lidx, lrg->degree(),
+                    lrg->degrees_of_freedom());
+      lrg->dump();
+    }
+#endif
+
+    // Re-insert into the IFG
+    _ifg->re_insert(lidx);
+    if( !lrg->alive() ) continue;
+    // capture allstackedness flag before mask is hacked
+    const int is_allstack = lrg->mask().is_AllStack();
+
+    // Yeah, yeah, yeah, I know, I know.  I can refactor this
+    // to avoid the GOTO, although the refactored code will not
+    // be much clearer.  We arrive here IFF we have a stack-based
+    // live range that cannot color in the current chunk, and it
+    // has to move into the next free stack chunk.
+    int chunk = 0;              // Current chunk is first chunk
+    retry_next_chunk:
+
+    // Remove neighbor colors
+    IndexSet *s = _ifg->neighbors(lidx);
+
+    debug_only(RegMask orig_mask = lrg->mask();)
+    IndexSetIterator elements(s);
+    uint neighbor;
+    while ((neighbor = elements.next()) != 0) {
+      // Note that neighbor might be a spill_reg.  In this case, exclusion
+      // of its color will be a no-op, since the spill_reg chunk is in outer
+      // space.  Also, if neighbor is in a different chunk, this exclusion
+      // will be a no-op.  (Later on, if lrg runs out of possible colors in
+      // its chunk, a new chunk of color may be tried, in which case
+      // examination of neighbors is started again, at retry_next_chunk.)
+      LRG &nlrg = lrgs(neighbor);
+      OptoReg::Name nreg = nlrg.reg();
+      // Only subtract masks in the same chunk
+      if( nreg >= chunk && nreg < chunk + RegMask::CHUNK_SIZE ) {
+#ifndef PRODUCT
+        uint size = lrg->mask().Size();
+        RegMask rm = lrg->mask();
+#endif
+        lrg->SUBTRACT(nlrg.mask());
+#ifndef PRODUCT
+        if (trace_spilling() && lrg->mask().Size() != size) {
+          ttyLocker ttyl;
+          tty->print("L%d ", lidx);
+          rm.dump();
+          tty->print(" intersected L%d ", neighbor);
+          nlrg.mask().dump();
+          tty->print(" removed ");
+          rm.SUBTRACT(lrg->mask());
+          rm.dump();
+          tty->print(" leaving ");
+          lrg->mask().dump();
+          tty->cr();
+        }
+#endif
+      }
+    }
+    //assert(is_allstack == lrg->mask().is_AllStack(), "nbrs must not change AllStackedness");
+    // Aligned pairs need aligned masks
+    if( lrg->num_regs() == 2 && !lrg->_fat_proj )
+      lrg->ClearToPairs();
+
+    // Check if a color is available and if so pick the color
+    OptoReg::Name reg = choose_color( *lrg, chunk );
+#ifdef SPARC
+    debug_only(lrg->compute_set_mask_size());
+    assert(lrg->num_regs() != 2 || lrg->is_bound() || is_even(reg-1), "allocate all doubles aligned");
+#endif
+
+    //---------------
+    // If we fail to color and the AllStack flag is set, trigger
+    // a chunk-rollover event
+    if(!OptoReg::is_valid(OptoReg::add(reg,-chunk)) && is_allstack) {
+      // Bump register mask up to next stack chunk
+      chunk += RegMask::CHUNK_SIZE;
+      lrg->Set_All();
+
+      goto retry_next_chunk;
+    }
+
+    //---------------
+    // Did we get a color?
+    else if( OptoReg::is_valid(reg)) {
+#ifndef PRODUCT
+      RegMask avail_rm = lrg->mask();
+#endif
+
+      // Record selected register
+      lrg->set_reg(reg);
+
+      if( reg >= _max_reg )     // Compute max register limit
+        _max_reg = OptoReg::add(reg,1);
+      // Fold reg back into normal space
+      reg = OptoReg::add(reg,-chunk);
+
+      // If the live range is not bound, then we actually had some choices
+      // to make.  In this case, the mask has more bits in it than the colors
+      // choosen.  Restrict the mask to just what was picked.
+      if( lrg->num_regs() == 1 ) { // Size 1 live range
+        lrg->Clear();           // Clear the mask
+        lrg->Insert(reg);       // Set regmask to match selected reg
+        lrg->set_mask_size(1);
+      } else if( !lrg->_fat_proj ) {
+        // For pairs, also insert the low bit of the pair
+        assert( lrg->num_regs() == 2, "unbound fatproj???" );
+        lrg->Clear();           // Clear the mask
+        lrg->Insert(reg);       // Set regmask to match selected reg
+        lrg->Insert(OptoReg::add(reg,-1));
+        lrg->set_mask_size(2);
+      } else {                  // Else fatproj
+        // mask must be equal to fatproj bits, by definition
+      }
+#ifndef PRODUCT
+      if (trace_spilling()) {
+        ttyLocker ttyl;
+        tty->print("L%d selected ", lidx);
+        lrg->mask().dump();
+        tty->print(" from ");
+        avail_rm.dump();
+        tty->cr();
+      }
+#endif
+      // Note that reg is the highest-numbered register in the newly-bound mask.
+    } // end color available case
+
+    //---------------
+    // Live range is live and no colors available
+    else {
+      assert( lrg->alive(), "" );
+      assert( !lrg->_fat_proj || lrg->_def == NodeSentinel ||
+              lrg->_def->outcnt() > 0, "fat_proj cannot spill");
+      assert( !orig_mask.is_AllStack(), "All Stack does not spill" );
+
+      // Assign the special spillreg register
+      lrg->set_reg(OptoReg::Name(spill_reg++));
+      // Do not empty the regmask; leave mask_size lying around
+      // for use during Spilling
+#ifndef PRODUCT
+      if( trace_spilling() ) {
+        ttyLocker ttyl;
+        tty->print("L%d spilling with neighbors: ", lidx);
+        s->dump();
+        debug_only(tty->print(" original mask: "));
+        debug_only(orig_mask.dump());
+        dump_lrg(lidx);
+      }
+#endif
+    } // end spill case
+
+  }
+
+  return spill_reg-LRG::SPILL_REG;      // Return number of spills
+}
+
+
+//------------------------------copy_was_spilled-------------------------------
+// Copy 'was_spilled'-edness from the source Node to the dst Node.
+void PhaseChaitin::copy_was_spilled( Node *src, Node *dst ) {
+  if( _spilled_once.test(src->_idx) ) {
+    _spilled_once.set(dst->_idx);
+    lrgs(Find(dst))._was_spilled1 = 1;
+    if( _spilled_twice.test(src->_idx) ) {
+      _spilled_twice.set(dst->_idx);
+      lrgs(Find(dst))._was_spilled2 = 1;
+    }
+  }
+}
+
+//------------------------------set_was_spilled--------------------------------
+// Set the 'spilled_once' or 'spilled_twice' flag on a node.
+void PhaseChaitin::set_was_spilled( Node *n ) {
+  if( _spilled_once.test_set(n->_idx) )
+    _spilled_twice.set(n->_idx);
+}
+
+//------------------------------fixup_spills-----------------------------------
+// Convert Ideal spill instructions into proper FramePtr + offset Loads and
+// Stores.  Use-def chains are NOT preserved, but Node->LRG->reg maps are.
+void PhaseChaitin::fixup_spills() {
+  // This function does only cisc spill work.
+  if( !UseCISCSpill ) return;
+
+  NOT_PRODUCT( Compile::TracePhase t3("fixupSpills", &_t_fixupSpills, TimeCompiler); )
+
+  // Grab the Frame Pointer
+  Node *fp = _cfg._broot->head()->in(1)->in(TypeFunc::FramePtr);
+
+  // For all blocks
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+
+    // For all instructions in block
+    uint last_inst = b->end_idx();
+    for( uint j = 1; j <= last_inst; j++ ) {
+      Node *n = b->_nodes[j];
+
+      // Dead instruction???
+      assert( n->outcnt() != 0 ||// Nothing dead after post alloc
+              C->top() == n ||  // Or the random TOP node
+              n->is_Proj(),     // Or a fat-proj kill node
+              "No dead instructions after post-alloc" );
+
+      int inp = n->cisc_operand();
+      if( inp != AdlcVMDeps::Not_cisc_spillable ) {
+        // Convert operand number to edge index number
+        MachNode *mach = n->as_Mach();
+        inp = mach->operand_index(inp);
+        Node *src = n->in(inp);   // Value to load or store
+        LRG &lrg_cisc = lrgs( Find_const(src) );
+        OptoReg::Name src_reg = lrg_cisc.reg();
+        // Doubles record the HIGH register of an adjacent pair.
+        src_reg = OptoReg::add(src_reg,1-lrg_cisc.num_regs());
+        if( OptoReg::is_stack(src_reg) ) { // If input is on stack
+          // This is a CISC Spill, get stack offset and construct new node
+#ifndef PRODUCT
+          if( TraceCISCSpill ) {
+            tty->print("    reg-instr:  ");
+            n->dump();
+          }
+#endif
+          int stk_offset = reg2offset(src_reg);
+          // Bailout if we might exceed node limit when spilling this instruction
+          C->check_node_count(0, "out of nodes fixing spills");
+          if (C->failing())  return;
+          // Transform node
+          MachNode *cisc = mach->cisc_version(stk_offset, C)->as_Mach();
+          cisc->set_req(inp,fp);          // Base register is frame pointer
+          if( cisc->oper_input_base() > 1 && mach->oper_input_base() <= 1 ) {
+            assert( cisc->oper_input_base() == 2, "Only adding one edge");
+            cisc->ins_req(1,src);         // Requires a memory edge
+          }
+          b->_nodes.map(j,cisc);          // Insert into basic block
+          n->replace_by(cisc); // Correct graph
+          //
+          ++_used_cisc_instructions;
+#ifndef PRODUCT
+          if( TraceCISCSpill ) {
+            tty->print("    cisc-instr: ");
+            cisc->dump();
+          }
+#endif
+        } else {
+#ifndef PRODUCT
+          if( TraceCISCSpill ) {
+            tty->print("    using reg-instr: ");
+            n->dump();
+          }
+#endif
+          ++_unused_cisc_instructions;    // input can be on stack
+        }
+      }
+
+    } // End of for all instructions
+
+  } // End of for all blocks
+}
+
+//------------------------------find_base_for_derived--------------------------
+// Helper to stretch above; recursively discover the base Node for a
+// given derived Node.  Easy for AddP-related machine nodes, but needs
+// to be recursive for derived Phis.
+Node *PhaseChaitin::find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg ) {
+  // See if already computed; if so return it
+  if( derived_base_map[derived->_idx] )
+    return derived_base_map[derived->_idx];
+
+  // See if this happens to be a base.
+  // NOTE: we use TypePtr instead of TypeOopPtr because we can have
+  // pointers derived from NULL!  These are always along paths that
+  // can't happen at run-time but the optimizer cannot deduce it so
+  // we have to handle it gracefully.
+  const TypePtr *tj = derived->bottom_type()->isa_ptr();
+  // If its an OOP with a non-zero offset, then it is derived.
+  if( tj->_offset == 0 ) {
+    derived_base_map[derived->_idx] = derived;
+    return derived;
+  }
+  // Derived is NULL+offset?  Base is NULL!
+  if( derived->is_Con() ) {
+    Node *base = new (C, 1) ConPNode( TypePtr::NULL_PTR );
+    uint no_lidx = 0;  // an unmatched constant in debug info has no LRG
+    _names.extend(base->_idx, no_lidx);
+    derived_base_map[derived->_idx] = base;
+    return base;
+  }
+
+  // Check for AddP-related opcodes
+  if( !derived->is_Phi() ) {
+    assert( derived->as_Mach()->ideal_Opcode() == Op_AddP, "" );
+    Node *base = derived->in(AddPNode::Base);
+    derived_base_map[derived->_idx] = base;
+    return base;
+  }
+
+  // Recursively find bases for Phis.
+  // First check to see if we can avoid a base Phi here.
+  Node *base = find_base_for_derived( derived_base_map, derived->in(1),maxlrg);
+  uint i;
+  for( i = 2; i < derived->req(); i++ )
+    if( base != find_base_for_derived( derived_base_map,derived->in(i),maxlrg))
+      break;
+  // Went to the end without finding any different bases?
+  if( i == derived->req() ) {   // No need for a base Phi here
+    derived_base_map[derived->_idx] = base;
+    return base;
+  }
+
+  // Now we see we need a base-Phi here to merge the bases
+  base = new (C, derived->req()) PhiNode( derived->in(0), base->bottom_type() );
+  for( i = 1; i < derived->req(); i++ )
+    base->init_req(i, find_base_for_derived(derived_base_map, derived->in(i), maxlrg));
+
+  // Search the current block for an existing base-Phi
+  Block *b = _cfg._bbs[derived->_idx];
+  for( i = 1; i <= b->end_idx(); i++ ) {// Search for matching Phi
+    Node *phi = b->_nodes[i];
+    if( !phi->is_Phi() ) {      // Found end of Phis with no match?
+      b->_nodes.insert( i, base ); // Must insert created Phi here as base
+      _cfg._bbs.map( base->_idx, b );
+      new_lrg(base,maxlrg++);
+      break;
+    }
+    // See if Phi matches.
+    uint j;
+    for( j = 1; j < base->req(); j++ )
+      if( phi->in(j) != base->in(j) &&
+          !(phi->in(j)->is_Con() && base->in(j)->is_Con()) ) // allow different NULLs
+        break;
+    if( j == base->req() ) {    // All inputs match?
+      base = phi;               // Then use existing 'phi' and drop 'base'
+      break;
+    }
+  }
+
+
+  // Cache info for later passes
+  derived_base_map[derived->_idx] = base;
+  return base;
+}
+
+
+//------------------------------stretch_base_pointer_live_ranges---------------
+// At each Safepoint, insert extra debug edges for each pair of derived value/
+// base pointer that is live across the Safepoint for oopmap building.  The
+// edge pairs get added in after sfpt->jvmtail()->oopoff(), but are in the
+// required edge set.
+bool PhaseChaitin::stretch_base_pointer_live_ranges( ResourceArea *a ) {
+  int must_recompute_live = false;
+  uint maxlrg = _maxlrg;
+  Node **derived_base_map = (Node**)a->Amalloc(sizeof(Node*)*C->unique());
+  memset( derived_base_map, 0, sizeof(Node*)*C->unique() );
+
+  // For all blocks in RPO do...
+  for( uint i=0; i<_cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    // Note use of deep-copy constructor.  I cannot hammer the original
+    // liveout bits, because they are needed by the following coalesce pass.
+    IndexSet liveout(_live->live(b));
+
+    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+      Node *n = b->_nodes[j-1];
+
+      // Pre-split compares of loop-phis.  Loop-phis form a cycle we would
+      // like to see in the same register.  Compare uses the loop-phi and so
+      // extends its live range BUT cannot be part of the cycle.  If this
+      // extended live range overlaps with the update of the loop-phi value
+      // we need both alive at the same time -- which requires at least 1
+      // copy.  But because Intel has only 2-address registers we end up with
+      // at least 2 copies, one before the loop-phi update instruction and
+      // one after.  Instead we split the input to the compare just after the
+      // phi.
+      if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_CmpI ) {
+        Node *phi = n->in(1);
+        if( phi->is_Phi() && phi->as_Phi()->region()->is_Loop() ) {
+          Block *phi_block = _cfg._bbs[phi->_idx];
+          if( _cfg._bbs[phi_block->pred(2)->_idx] == b ) {
+            const RegMask *mask = C->matcher()->idealreg2spillmask[Op_RegI];
+            Node *spill = new (C) MachSpillCopyNode( phi, *mask, *mask );
+            insert_proj( phi_block, 1, spill, maxlrg++ );
+            n->set_req(1,spill);
+            must_recompute_live = true;
+          }
+        }
+      }
+
+      // Get value being defined
+      uint lidx = n2lidx(n);
+      if( lidx && lidx < _maxlrg /* Ignore the occasional brand-new live range */) {
+        // Remove from live-out set
+        liveout.remove(lidx);
+
+        // Copies do not define a new value and so do not interfere.
+        // Remove the copies source from the liveout set before interfering.
+        uint idx = n->is_Copy();
+        if( idx ) liveout.remove( n2lidx(n->in(idx)) );
+      }
+
+      // Found a safepoint?
+      JVMState *jvms = n->jvms();
+      if( jvms ) {
+        // Now scan for a live derived pointer
+        IndexSetIterator elements(&liveout);
+        uint neighbor;
+        while ((neighbor = elements.next()) != 0) {
+          // Find reaching DEF for base and derived values
+          // This works because we are still in SSA during this call.
+          Node *derived = lrgs(neighbor)._def;
+          const TypePtr *tj = derived->bottom_type()->isa_ptr();
+          // If its an OOP with a non-zero offset, then it is derived.
+          if( tj && tj->_offset != 0 && tj->isa_oop_ptr() ) {
+            Node *base = find_base_for_derived( derived_base_map, derived, maxlrg );
+            assert( base->_idx < _names.Size(), "" );
+            // Add reaching DEFs of derived pointer and base pointer as a
+            // pair of inputs
+            n->add_req( derived );
+            n->add_req( base );
+
+            // See if the base pointer is already live to this point.
+            // Since I'm working on the SSA form, live-ness amounts to
+            // reaching def's.  So if I find the base's live range then
+            // I know the base's def reaches here.
+            if( (n2lidx(base) >= _maxlrg ||// (Brand new base (hence not live) or
+                 !liveout.member( n2lidx(base) ) ) && // not live) AND
+                 (n2lidx(base) > 0)                && // not a constant
+                 _cfg._bbs[base->_idx] != b ) {     //  base not def'd in blk)
+              // Base pointer is not currently live.  Since I stretched
+              // the base pointer to here and it crosses basic-block
+              // boundaries, the global live info is now incorrect.
+              // Recompute live.
+              must_recompute_live = true;
+            } // End of if base pointer is not live to debug info
+          }
+        } // End of scan all live data for derived ptrs crossing GC point
+      } // End of if found a GC point
+
+      // Make all inputs live
+      if( !n->is_Phi() ) {      // Phi function uses come from prior block
+        for( uint k = 1; k < n->req(); k++ ) {
+          uint lidx = n2lidx(n->in(k));
+          if( lidx < _maxlrg )
+            liveout.insert( lidx );
+        }
+      }
+
+    } // End of forall instructions in block
+    liveout.clear();  // Free the memory used by liveout.
+
+  } // End of forall blocks
+  _maxlrg = maxlrg;
+
+  // If I created a new live range I need to recompute live
+  if( maxlrg != _ifg->_maxlrg )
+    must_recompute_live = true;
+
+  return must_recompute_live != 0;
+}
+
+
+//------------------------------add_reference----------------------------------
+// Extend the node to LRG mapping
+void PhaseChaitin::add_reference( const Node *node, const Node *old_node ) {
+  _names.extend( node->_idx, n2lidx(old_node) );
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseChaitin::dump( const Node *n ) const {
+  uint r = (n->_idx < _names.Size() ) ? Find_const(n) : 0;
+  tty->print("L%d",r);
+  if( r && n->Opcode() != Op_Phi ) {
+    if( _node_regs ) {          // Got a post-allocation copy of allocation?
+      tty->print("[");
+      OptoReg::Name second = get_reg_second(n);
+      if( OptoReg::is_valid(second) ) {
+        if( OptoReg::is_reg(second) )
+          tty->print("%s:",Matcher::regName[second]);
+        else
+          tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(second));
+      }
+      OptoReg::Name first = get_reg_first(n);
+      if( OptoReg::is_reg(first) )
+        tty->print("%s]",Matcher::regName[first]);
+      else
+         tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer), reg2offset_unchecked(first));
+    } else
+    n->out_RegMask().dump();
+  }
+  tty->print("/N%d\t",n->_idx);
+  tty->print("%s === ", n->Name());
+  uint k;
+  for( k = 0; k < n->req(); k++) {
+    Node *m = n->in(k);
+    if( !m ) tty->print("_ ");
+    else {
+      uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+      tty->print("L%d",r);
+      // Data MultiNode's can have projections with no real registers.
+      // Don't die while dumping them.
+      int op = n->Opcode();
+      if( r && op != Op_Phi && op != Op_Proj && op != Op_SCMemProj) {
+        if( _node_regs ) {
+          tty->print("[");
+          OptoReg::Name second = get_reg_second(n->in(k));
+          if( OptoReg::is_valid(second) ) {
+            if( OptoReg::is_reg(second) )
+              tty->print("%s:",Matcher::regName[second]);
+            else
+              tty->print("%s+%d:",OptoReg::regname(OptoReg::c_frame_pointer),
+                         reg2offset_unchecked(second));
+          }
+          OptoReg::Name first = get_reg_first(n->in(k));
+          if( OptoReg::is_reg(first) )
+            tty->print("%s]",Matcher::regName[first]);
+          else
+            tty->print("%s+%d]",OptoReg::regname(OptoReg::c_frame_pointer),
+                       reg2offset_unchecked(first));
+        } else
+          n->in_RegMask(k).dump();
+      }
+      tty->print("/N%d ",m->_idx);
+    }
+  }
+  if( k < n->len() && n->in(k) ) tty->print("| ");
+  for( ; k < n->len(); k++ ) {
+    Node *m = n->in(k);
+    if( !m ) break;
+    uint r = (m->_idx < _names.Size() ) ? Find_const(m) : 0;
+    tty->print("L%d",r);
+    tty->print("/N%d ",m->_idx);
+  }
+  if( n->is_Mach() ) n->as_Mach()->dump_spec(tty);
+  else n->dump_spec(tty);
+  if( _spilled_once.test(n->_idx ) ) {
+    tty->print(" Spill_1");
+    if( _spilled_twice.test(n->_idx ) )
+      tty->print(" Spill_2");
+  }
+  tty->print("\n");
+}
+
+void PhaseChaitin::dump( const Block * b ) const {
+  b->dump_head( &_cfg._bbs );
+
+  // For all instructions
+  for( uint j = 0; j < b->_nodes.size(); j++ )
+    dump(b->_nodes[j]);
+  // Print live-out info at end of block
+  if( _live ) {
+    tty->print("Liveout: ");
+    IndexSet *live = _live->live(b);
+    IndexSetIterator elements(live);
+    tty->print("{");
+    uint i;
+    while ((i = elements.next()) != 0) {
+      tty->print("L%d ", Find_const(i));
+    }
+    tty->print_cr("}");
+  }
+  tty->print("\n");
+}
+
+void PhaseChaitin::dump() const {
+  tty->print( "--- Chaitin -- argsize: %d  framesize: %d ---\n",
+              _matcher._new_SP, _framesize );
+
+  // For all blocks
+  for( uint i = 0; i < _cfg._num_blocks; i++ )
+    dump(_cfg._blocks[i]);
+  // End of per-block dump
+  tty->print("\n");
+
+  if (!_ifg) {
+    tty->print("(No IFG.)\n");
+    return;
+  }
+
+  // Dump LRG array
+  tty->print("--- Live RanGe Array ---\n");
+  for(uint i2 = 1; i2 < _maxlrg; i2++ ) {
+    tty->print("L%d: ",i2);
+    if( i2 < _ifg->_maxlrg ) lrgs(i2).dump( );
+    else tty->print("new LRG");
+  }
+  tty->print_cr("");
+
+  // Dump lo-degree list
+  tty->print("Lo degree: ");
+  for(uint i3 = _lo_degree; i3; i3 = lrgs(i3)._next )
+    tty->print("L%d ",i3);
+  tty->print_cr("");
+
+  // Dump lo-stk-degree list
+  tty->print("Lo stk degree: ");
+  for(uint i4 = _lo_stk_degree; i4; i4 = lrgs(i4)._next )
+    tty->print("L%d ",i4);
+  tty->print_cr("");
+
+  // Dump lo-degree list
+  tty->print("Hi degree: ");
+  for(uint i5 = _hi_degree; i5; i5 = lrgs(i5)._next )
+    tty->print("L%d ",i5);
+  tty->print_cr("");
+}
+
+//------------------------------dump_degree_lists------------------------------
+void PhaseChaitin::dump_degree_lists() const {
+  // Dump lo-degree list
+  tty->print("Lo degree: ");
+  for( uint i = _lo_degree; i; i = lrgs(i)._next )
+    tty->print("L%d ",i);
+  tty->print_cr("");
+
+  // Dump lo-stk-degree list
+  tty->print("Lo stk degree: ");
+  for(uint i2 = _lo_stk_degree; i2; i2 = lrgs(i2)._next )
+    tty->print("L%d ",i2);
+  tty->print_cr("");
+
+  // Dump lo-degree list
+  tty->print("Hi degree: ");
+  for(uint i3 = _hi_degree; i3; i3 = lrgs(i3)._next )
+    tty->print("L%d ",i3);
+  tty->print_cr("");
+}
+
+//------------------------------dump_simplified--------------------------------
+void PhaseChaitin::dump_simplified() const {
+  tty->print("Simplified: ");
+  for( uint i = _simplified; i; i = lrgs(i)._next )
+    tty->print("L%d ",i);
+  tty->print_cr("");
+}
+
+static char *print_reg( OptoReg::Name reg, const PhaseChaitin *pc, char *buf ) {
+  if ((int)reg < 0)
+    sprintf(buf, "<OptoReg::%d>", (int)reg);
+  else if (OptoReg::is_reg(reg))
+    strcpy(buf, Matcher::regName[reg]);
+  else
+    sprintf(buf,"%s + #%d",OptoReg::regname(OptoReg::c_frame_pointer),
+            pc->reg2offset(reg));
+  return buf+strlen(buf);
+}
+
+//------------------------------dump_register----------------------------------
+// Dump a register name into a buffer.  Be intelligent if we get called
+// before allocation is complete.
+char *PhaseChaitin::dump_register( const Node *n, char *buf  ) const {
+  if( !this ) {                 // Not got anything?
+    sprintf(buf,"N%d",n->_idx); // Then use Node index
+  } else if( _node_regs ) {
+    // Post allocation, use direct mappings, no LRG info available
+    print_reg( get_reg_first(n), this, buf );
+  } else {
+    uint lidx = Find_const(n); // Grab LRG number
+    if( !_ifg ) {
+      sprintf(buf,"L%d",lidx);  // No register binding yet
+    } else if( !lidx ) {        // Special, not allocated value
+      strcpy(buf,"Special");
+    } else if( (lrgs(lidx).num_regs() == 1)
+                ? !lrgs(lidx).mask().is_bound1()
+                : !lrgs(lidx).mask().is_bound2() ) {
+      sprintf(buf,"L%d",lidx); // No register binding yet
+    } else {                    // Hah!  We have a bound machine register
+      print_reg( lrgs(lidx).reg(), this, buf );
+    }
+  }
+  return buf+strlen(buf);
+}
+
+//----------------------dump_for_spill_split_recycle--------------------------
+void PhaseChaitin::dump_for_spill_split_recycle() const {
+  if( WizardMode && (PrintCompilation || PrintOpto) ) {
+    // Display which live ranges need to be split and the allocator's state
+    tty->print_cr("Graph-Coloring Iteration %d will split the following live ranges", _trip_cnt);
+    for( uint bidx = 1; bidx < _maxlrg; bidx++ ) {
+      if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
+        tty->print("L%d: ", bidx);
+        lrgs(bidx).dump();
+      }
+    }
+    tty->cr();
+    dump();
+  }
+}
+
+//------------------------------dump_frame------------------------------------
+void PhaseChaitin::dump_frame() const {
+  const char *fp = OptoReg::regname(OptoReg::c_frame_pointer);
+  const TypeTuple *domain = C->tf()->domain();
+  const int        argcnt = domain->cnt() - TypeFunc::Parms;
+
+  // Incoming arguments in registers dump
+  for( int k = 0; k < argcnt; k++ ) {
+    OptoReg::Name parmreg = _matcher._parm_regs[k].first();
+    if( OptoReg::is_reg(parmreg))  {
+      const char *reg_name = OptoReg::regname(parmreg);
+      tty->print("#r%3.3d %s", parmreg, reg_name);
+      parmreg = _matcher._parm_regs[k].second();
+      if( OptoReg::is_reg(parmreg))  {
+        tty->print(":%s", OptoReg::regname(parmreg));
+      }
+      tty->print("   : parm %d: ", k);
+      domain->field_at(k + TypeFunc::Parms)->dump();
+      tty->print_cr("");
+    }
+  }
+
+  // Check for un-owned padding above incoming args
+  OptoReg::Name reg = _matcher._new_SP;
+  if( reg > _matcher._in_arg_limit ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print_cr("#r%3.3d %s+%2d: pad0, owned by CALLER", reg, fp, reg2offset_unchecked(reg));
+  }
+
+  // Incoming argument area dump
+  OptoReg::Name begin_in_arg = OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots());
+  while( reg > begin_in_arg ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
+    int j;
+    for( j = 0; j < argcnt; j++) {
+      if( _matcher._parm_regs[j].first() == reg ||
+          _matcher._parm_regs[j].second() == reg ) {
+        tty->print("parm %d: ",j);
+        domain->field_at(j + TypeFunc::Parms)->dump();
+        tty->print_cr("");
+        break;
+      }
+    }
+    if( j >= argcnt )
+      tty->print_cr("HOLE, owned by SELF");
+  }
+
+  // Old outgoing preserve area
+  while( reg > _matcher._old_SP ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print_cr("#r%3.3d %s+%2d: old out preserve",reg,fp,reg2offset_unchecked(reg));
+  }
+
+  // Old SP
+  tty->print_cr("# -- Old %s -- Framesize: %d --",fp,
+    reg2offset_unchecked(OptoReg::add(_matcher._old_SP,-1)) - reg2offset_unchecked(_matcher._new_SP)+jintSize);
+
+  // Preserve area dump
+  reg = OptoReg::add(reg, -1);
+  while( OptoReg::is_stack(reg)) {
+    tty->print("#r%3.3d %s+%2d: ",reg,fp,reg2offset_unchecked(reg));
+    if( _matcher.return_addr() == reg )
+      tty->print_cr("return address");
+    else if( _matcher.return_addr() == OptoReg::add(reg,1) &&
+             VerifyStackAtCalls )
+      tty->print_cr("0xBADB100D   +VerifyStackAtCalls");
+    else if ((int)OptoReg::reg2stack(reg) < C->fixed_slots())
+      tty->print_cr("Fixed slot %d", OptoReg::reg2stack(reg));
+    else
+      tty->print_cr("pad2, in_preserve");
+    reg = OptoReg::add(reg, -1);
+  }
+
+  // Spill area dump
+  reg = OptoReg::add(_matcher._new_SP, _framesize );
+  while( reg > _matcher._out_arg_limit ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print_cr("#r%3.3d %s+%2d: spill",reg,fp,reg2offset_unchecked(reg));
+  }
+
+  // Outgoing argument area dump
+  while( reg > OptoReg::add(_matcher._new_SP, C->out_preserve_stack_slots()) ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print_cr("#r%3.3d %s+%2d: outgoing argument",reg,fp,reg2offset_unchecked(reg));
+  }
+
+  // Outgoing new preserve area
+  while( reg > _matcher._new_SP ) {
+    reg = OptoReg::add(reg, -1);
+    tty->print_cr("#r%3.3d %s+%2d: new out preserve",reg,fp,reg2offset_unchecked(reg));
+  }
+  tty->print_cr("#");
+}
+
+//------------------------------dump_bb----------------------------------------
+void PhaseChaitin::dump_bb( uint pre_order ) const {
+  tty->print_cr("---dump of B%d---",pre_order);
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    if( b->_pre_order == pre_order )
+      dump(b);
+  }
+}
+
+//------------------------------dump_lrg---------------------------------------
+void PhaseChaitin::dump_lrg( uint lidx ) const {
+  tty->print_cr("---dump of L%d---",lidx);
+
+  if( _ifg ) {
+    if( lidx >= _maxlrg ) {
+      tty->print("Attempt to print live range index beyond max live range.\n");
+      return;
+    }
+    tty->print("L%d: ",lidx);
+    lrgs(lidx).dump( );
+  }
+  if( _ifg ) {    tty->print("Neighbors: %d - ", _ifg->neighbor_cnt(lidx));
+    _ifg->neighbors(lidx)->dump();
+    tty->cr();
+  }
+  // For all blocks
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    int dump_once = 0;
+
+    // For all instructions
+    for( uint j = 0; j < b->_nodes.size(); j++ ) {
+      Node *n = b->_nodes[j];
+      if( Find_const(n) == lidx ) {
+        if( !dump_once++ ) {
+          tty->cr();
+          b->dump_head( &_cfg._bbs );
+        }
+        dump(n);
+        continue;
+      }
+      uint cnt = n->req();
+      for( uint k = 1; k < cnt; k++ ) {
+        Node *m = n->in(k);
+        if (!m)  continue;  // be robust in the dumper
+        if( Find_const(m) == lidx ) {
+          if( !dump_once++ ) {
+            tty->cr();
+            b->dump_head( &_cfg._bbs );
+          }
+          dump(n);
+        }
+      }
+    }
+  } // End of per-block dump
+  tty->cr();
+}
+#endif // not PRODUCT
+
+//------------------------------print_chaitin_statistics-------------------------------
+int PhaseChaitin::_final_loads  = 0;
+int PhaseChaitin::_final_stores = 0;
+int PhaseChaitin::_final_memoves= 0;
+int PhaseChaitin::_final_copies = 0;
+double PhaseChaitin::_final_load_cost  = 0;
+double PhaseChaitin::_final_store_cost = 0;
+double PhaseChaitin::_final_memove_cost= 0;
+double PhaseChaitin::_final_copy_cost  = 0;
+int PhaseChaitin::_conserv_coalesce = 0;
+int PhaseChaitin::_conserv_coalesce_pair = 0;
+int PhaseChaitin::_conserv_coalesce_trie = 0;
+int PhaseChaitin::_conserv_coalesce_quad = 0;
+int PhaseChaitin::_post_alloc = 0;
+int PhaseChaitin::_lost_opp_pp_coalesce = 0;
+int PhaseChaitin::_lost_opp_cflow_coalesce = 0;
+int PhaseChaitin::_used_cisc_instructions   = 0;
+int PhaseChaitin::_unused_cisc_instructions = 0;
+int PhaseChaitin::_allocator_attempts       = 0;
+int PhaseChaitin::_allocator_successes      = 0;
+
+#ifndef PRODUCT
+uint PhaseChaitin::_high_pressure           = 0;
+uint PhaseChaitin::_low_pressure            = 0;
+
+void PhaseChaitin::print_chaitin_statistics() {
+  tty->print_cr("Inserted %d spill loads, %d spill stores, %d mem-mem moves and %d copies.", _final_loads, _final_stores, _final_memoves, _final_copies);
+  tty->print_cr("Total load cost= %6.0f, store cost = %6.0f, mem-mem cost = %5.2f, copy cost = %5.0f.", _final_load_cost, _final_store_cost, _final_memove_cost, _final_copy_cost);
+  tty->print_cr("Adjusted spill cost = %7.0f.",
+                _final_load_cost*4.0 + _final_store_cost  * 2.0 +
+                _final_copy_cost*1.0 + _final_memove_cost*12.0);
+  tty->print("Conservatively coalesced %d copies, %d pairs",
+                _conserv_coalesce, _conserv_coalesce_pair);
+  if( _conserv_coalesce_trie || _conserv_coalesce_quad )
+    tty->print(", %d tries, %d quads", _conserv_coalesce_trie, _conserv_coalesce_quad);
+  tty->print_cr(", %d post alloc.", _post_alloc);
+  if( _lost_opp_pp_coalesce || _lost_opp_cflow_coalesce )
+    tty->print_cr("Lost coalesce opportunity, %d private-private, and %d cflow interfered.",
+                  _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce );
+  if( _used_cisc_instructions || _unused_cisc_instructions )
+    tty->print_cr("Used cisc instruction  %d,  remained in register %d",
+                   _used_cisc_instructions, _unused_cisc_instructions);
+  if( _allocator_successes != 0 )
+    tty->print_cr("Average allocation trips %f", (float)_allocator_attempts/(float)_allocator_successes);
+  tty->print_cr("High Pressure Blocks = %d, Low Pressure Blocks = %d", _high_pressure, _low_pressure);
+}
+#endif // not PRODUCT
diff --git a/src/share/vm/opto/chaitin.hpp b/src/share/vm/opto/chaitin.hpp
new file mode 100644
index 000000000..df848d72c
--- /dev/null
+++ b/src/share/vm/opto/chaitin.hpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class LoopTree;
+class MachCallNode;
+class MachSafePointNode;
+class Matcher;
+class PhaseCFG;
+class PhaseLive;
+class PhaseRegAlloc;
+class   PhaseChaitin;
+
+#define OPTO_DEBUG_SPLIT_FREQ  BLOCK_FREQUENCY(0.001)
+#define OPTO_LRG_HIGH_FREQ     BLOCK_FREQUENCY(0.25)
+
+//------------------------------LRG--------------------------------------------
+// Live-RanGe structure.
+class LRG : public ResourceObj {
+public:
+  enum { SPILL_REG=29999 };     // Register number of a spilled LRG
+
+  double _cost;                 // 2 for loads/1 for stores times block freq
+  double _area;                 // Sum of all simultaneously live values
+  double score() const;         // Compute score from cost and area
+  double _maxfreq;              // Maximum frequency of any def or use
+
+  Node *_def;                   // Check for multi-def live ranges
+#ifndef PRODUCT
+  GrowableArray<Node*>* _defs;
+#endif
+
+  uint _risk_bias;              // Index of LRG which we want to avoid color
+  uint _copy_bias;              // Index of LRG which we want to share color
+
+  uint _next;                   // Index of next LRG in linked list
+  uint _prev;                   // Index of prev LRG in linked list
+private:
+  uint _reg;                    // Chosen register; undefined if mask is plural
+public:
+  // Return chosen register for this LRG.  Error if the LRG is not bound to
+  // a single register.
+  OptoReg::Name reg() const { return OptoReg::Name(_reg); }
+  void set_reg( OptoReg::Name r ) { _reg = r; }
+
+private:
+  uint _eff_degree;             // Effective degree: Sum of neighbors _num_regs
+public:
+  int degree() const { assert( _degree_valid, "" ); return _eff_degree; }
+  // Degree starts not valid and any change to the IFG neighbor
+  // set makes it not valid.
+  void set_degree( uint degree ) { _eff_degree = degree; debug_only(_degree_valid = 1;) }
+  // Made a change that hammered degree
+  void invalid_degree() { debug_only(_degree_valid=0;) }
+  // Incrementally modify degree.  If it was correct, it should remain correct
+  void inc_degree( uint mod ) { _eff_degree += mod; }
+  // Compute the degree between 2 live ranges
+  int compute_degree( LRG &l ) const;
+
+private:
+  RegMask _mask;                // Allowed registers for this LRG
+  uint _mask_size;              // cache of _mask.Size();
+public:
+  int compute_mask_size() const { return _mask.is_AllStack() ? 65535 : _mask.Size(); }
+  void set_mask_size( int size ) {
+    assert((size == 65535) || (size == (int)_mask.Size()), "");
+    _mask_size = size;
+    debug_only(_msize_valid=1;)
+    debug_only( if( _num_regs == 2 && !_fat_proj ) _mask.VerifyPairs(); )
+  }
+  void compute_set_mask_size() { set_mask_size(compute_mask_size()); }
+  int mask_size() const { assert( _msize_valid, "mask size not valid" );
+                          return _mask_size; }
+  // Get the last mask size computed, even if it does not match the
+  // count of bits in the current mask.
+  int get_invalid_mask_size() const { return _mask_size; }
+  const RegMask &mask() const { return _mask; }
+  void set_mask( const RegMask &rm ) { _mask = rm; debug_only(_msize_valid=0;)}
+  void AND( const RegMask &rm ) { _mask.AND(rm); debug_only(_msize_valid=0;)}
+  void SUBTRACT( const RegMask &rm ) { _mask.SUBTRACT(rm); debug_only(_msize_valid=0;)}
+  void Clear()   { _mask.Clear()  ; debug_only(_msize_valid=1); _mask_size = 0; }
+  void Set_All() { _mask.Set_All(); debug_only(_msize_valid=1); _mask_size = RegMask::CHUNK_SIZE; }
+  void Insert( OptoReg::Name reg ) { _mask.Insert(reg);  debug_only(_msize_valid=0;) }
+  void Remove( OptoReg::Name reg ) { _mask.Remove(reg);  debug_only(_msize_valid=0;) }
+  void ClearToPairs() { _mask.ClearToPairs(); debug_only(_msize_valid=0;) }
+
+  // Number of registers this live range uses when it colors
+private:
+  uint8 _num_regs;              // 2 for Longs and Doubles, 1 for all else
+                                // except _num_regs is kill count for fat_proj
+public:
+  int num_regs() const { return _num_regs; }
+  void set_num_regs( int reg ) { assert( _num_regs == reg || !_num_regs, "" ); _num_regs = reg; }
+
+private:
+  // Number of physical registers this live range uses when it colors
+  // Architecture and register-set dependent
+  uint8 _reg_pressure;
+public:
+  void set_reg_pressure(int i)  { _reg_pressure = i; }
+  int      reg_pressure() const { return _reg_pressure; }
+
+  // How much 'wiggle room' does this live range have?
+  // How many color choices can it make (scaled by _num_regs)?
+  int degrees_of_freedom() const { return mask_size() - _num_regs; }
+  // Bound LRGs have ZERO degrees of freedom.  We also count
+  // must_spill as bound.
+  bool is_bound  () const { return _is_bound; }
+  // Negative degrees-of-freedom; even with no neighbors this
+  // live range must spill.
+  bool not_free() const { return degrees_of_freedom() <  0; }
+  // Is this live range of "low-degree"?  Trivially colorable?
+  bool lo_degree () const { return degree() <= degrees_of_freedom(); }
+  // Is this live range just barely "low-degree"?  Trivially colorable?
+  bool just_lo_degree () const { return degree() == degrees_of_freedom(); }
+
+  uint   _is_oop:1,             // Live-range holds an oop
+         _is_float:1,           // True if in float registers
+         _was_spilled1:1,       // True if prior spilling on def
+         _was_spilled2:1,       // True if twice prior spilling on def
+         _is_bound:1,           // live range starts life with no
+                                // degrees of freedom.
+         _direct_conflict:1,    // True if def and use registers in conflict
+         _must_spill:1,         // live range has lost all degrees of freedom
+    // If _fat_proj is set, live range does NOT require aligned, adjacent
+    // registers and has NO interferences.
+    // If _fat_proj is clear, live range requires num_regs() to be a power of
+    // 2, and it requires registers to form an aligned, adjacent set.
+         _fat_proj:1,           //
+         _was_lo:1,             // Was lo-degree prior to coalesce
+         _msize_valid:1,        // _mask_size cache valid
+         _degree_valid:1,       // _degree cache valid
+         _has_copy:1,           // Adjacent to some copy instruction
+         _at_risk:1;            // Simplify says this guy is at risk to spill
+
+
+  // Alive if non-zero, dead if zero
+  bool alive() const { return _def != NULL; }
+
+#ifndef PRODUCT
+  void dump( ) const;
+#endif
+};
+
+//------------------------------LRG_List---------------------------------------
+// Map Node indices to Live RanGe indices.
+// Array lookup in the optimized case.
+class LRG_List : public ResourceObj {
+  uint _cnt, _max;
+  uint* _lidxs;
+  ReallocMark _nesting;         // assertion check for reallocations
+public:
+  LRG_List( uint max );
+
+  uint lookup( uint nidx ) const {
+    return _lidxs[nidx];
+  }
+  uint operator[] (uint nidx) const { return lookup(nidx); }
+
+  void map( uint nidx, uint lidx ) {
+    assert( nidx < _cnt, "oob" );
+    _lidxs[nidx] = lidx;
+  }
+  void extend( uint nidx, uint lidx );
+
+  uint Size() const { return _cnt; }
+};
+
+//------------------------------IFG--------------------------------------------
+//                         InterFerence Graph
+// An undirected graph implementation.  Created with a fixed number of
+// vertices.  Edges can be added & tested.  Vertices can be removed, then
+// added back later with all edges intact.  Can add edges between one vertex
+// and a list of other vertices.  Can union vertices (and their edges)
+// together.  The IFG needs to be really really fast, and also fairly
+// abstract!  It needs abstraction so I can fiddle with the implementation to
+// get even more speed.
+class PhaseIFG : public Phase {
+  // Current implementation: a triangular adjacency list.
+
+  // Array of adjacency-lists, indexed by live-range number
+  IndexSet *_adjs;
+
+  // Assertion bit for proper use of Squaring
+  bool _is_square;
+
+  // Live range structure goes here
+  LRG *_lrgs;                   // Array of LRG structures
+
+public:
+  // Largest live-range number
+  uint _maxlrg;
+
+  Arena *_arena;
+
+  // Keep track of inserted and deleted Nodes
+  VectorSet *_yanked;
+
+  PhaseIFG( Arena *arena );
+  void init( uint maxlrg );
+
+  // Add edge between a and b.  Returns true if actually addded.
+  int add_edge( uint a, uint b );
+
+  // Add edge between a and everything in the vector
+  void add_vector( uint a, IndexSet *vec );
+
+  // Test for edge existance
+  int test_edge( uint a, uint b ) const;
+
+  // Square-up matrix for faster Union
+  void SquareUp();
+
+  // Return number of LRG neighbors
+  uint neighbor_cnt( uint a ) const { return _adjs[a].count(); }
+  // Union edges of b into a on Squared-up matrix
+  void Union( uint a, uint b );
+  // Test for edge in Squared-up matrix
+  int test_edge_sq( uint a, uint b ) const;
+  // Yank a Node and all connected edges from the IFG.  Be prepared to
+  // re-insert the yanked Node in reverse order of yanking.  Return a
+  // list of neighbors (edges) yanked.
+  IndexSet *remove_node( uint a );
+  // Reinsert a yanked Node
+  void re_insert( uint a );
+  // Return set of neighbors
+  IndexSet *neighbors( uint a ) const { return &_adjs[a]; }
+
+#ifndef PRODUCT
+  // Dump the IFG
+  void dump() const;
+  void stats() const;
+  void verify( const PhaseChaitin * ) const;
+#endif
+
+  //--------------- Live Range Accessors
+  LRG &lrgs(uint idx) const { assert(idx < _maxlrg, "oob"); return _lrgs[idx]; }
+
+  // Compute and set effective degree.  Might be folded into SquareUp().
+  void Compute_Effective_Degree();
+
+  // Compute effective degree as the sum of neighbors' _sizes.
+  int effective_degree( uint lidx ) const;
+};
+
+// TEMPORARILY REPLACED WITH COMMAND LINE FLAG
+
+//// !!!!! Magic Constants need to move into ad file
+#ifdef SPARC
+//#define FLOAT_PRESSURE 30  /*     SFLT_REG_mask.Size() - 1 */
+//#define INT_PRESSURE   23  /* NOTEMP_I_REG_mask.Size() - 1 */
+#define FLOAT_INCREMENT(regs) regs
+#else
+//#define FLOAT_PRESSURE 6
+//#define INT_PRESSURE   6
+#define FLOAT_INCREMENT(regs) 1
+#endif
+
+//------------------------------Chaitin----------------------------------------
+// Briggs-Chaitin style allocation, mostly.
+class PhaseChaitin : public PhaseRegAlloc {
+
+  int _trip_cnt;
+  int _alternate;
+
+  uint _maxlrg;                 // Max live range number
+  LRG &lrgs(uint idx) const { return _ifg->lrgs(idx); }
+  PhaseLive *_live;             // Liveness, used in the interference graph
+  PhaseIFG *_ifg;               // Interference graph (for original chunk)
+  Node_List **_lrg_nodes;       // Array of node; lists for lrgs which spill
+  VectorSet _spilled_once;      // Nodes that have been spilled
+  VectorSet _spilled_twice;     // Nodes that have been spilled twice
+
+  LRG_List _names;              // Map from Nodes to Live RanGes
+
+  // Union-find map.  Declared as a short for speed.
+  // Indexed by live-range number, it returns the compacted live-range number
+  LRG_List _uf_map;
+  // Reset the Union-Find map to identity
+  void reset_uf_map( uint maxlrg );
+  // Remove the need for the Union-Find mapping
+  void compress_uf_map_for_nodes( );
+
+  // Combine the Live Range Indices for these 2 Nodes into a single live
+  // range.  Future requests for any Node in either live range will
+  // return the live range index for the combined live range.
+  void Union( const Node *src, const Node *dst );
+
+  void new_lrg( const Node *x, uint lrg );
+
+  // Compact live ranges, removing unused ones.  Return new maxlrg.
+  void compact();
+
+  uint _lo_degree;              // Head of lo-degree LRGs list
+  uint _lo_stk_degree;          // Head of lo-stk-degree LRGs list
+  uint _hi_degree;              // Head of hi-degree LRGs list
+  uint _simplified;             // Linked list head of simplified LRGs
+
+  // Helper functions for Split()
+  uint split_DEF( Node *def, Block *b, int loc, uint max, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx );
+  uint split_USE( Node *def, Block *b, Node *use, uint useidx, uint max, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx );
+  int clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg );
+  Node *split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru );
+  // True if lidx is used before any real register is def'd in the block
+  bool prompt_use( Block *b, uint lidx );
+  Node *get_spillcopy_wide( Node *def, Node *use, uint uidx );
+  // Insert the spill at chosen location.  Skip over any interveneing Proj's or
+  // Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
+  // instead.  Update high-pressure indices.  Create a new live range.
+  void insert_proj( Block *b, uint i, Node *spill, uint maxlrg );
+
+  bool is_high_pressure( Block *b, LRG *lrg, uint insidx );
+
+  uint _oldphi;                 // Node index which separates pre-allocation nodes
+
+  Block **_blks;                // Array of blocks sorted by frequency for coalescing
+
+#ifndef PRODUCT
+  bool _trace_spilling;
+#endif
+
+public:
+  PhaseChaitin( uint unique, PhaseCFG &cfg, Matcher &matcher );
+  ~PhaseChaitin() {}
+
+  // Convert a Node into a Live Range Index - a lidx
+  uint Find( const Node *n ) {
+    uint lidx = n2lidx(n);
+    uint uf_lidx = _uf_map[lidx];
+    return (uf_lidx == lidx) ? uf_lidx : Find_compress(n);
+  }
+  uint Find_const( uint lrg ) const;
+  uint Find_const( const Node *n ) const;
+
+  // Do all the real work of allocate
+  void Register_Allocate();
+
+  uint n2lidx( const Node *n ) const { return _names[n->_idx]; }
+
+#ifndef PRODUCT
+  bool trace_spilling() const { return _trace_spilling; }
+#endif
+
+private:
+  // De-SSA the world.  Assign registers to Nodes.  Use the same register for
+  // all inputs to a PhiNode, effectively coalescing live ranges.  Insert
+  // copies as needed.
+  void de_ssa();
+  uint Find_compress( const Node *n );
+  uint Find( uint lidx ) {
+    uint uf_lidx = _uf_map[lidx];
+    return (uf_lidx == lidx) ? uf_lidx : Find_compress(lidx);
+  }
+  uint Find_compress( uint lidx );
+
+  uint Find_id( const Node *n ) {
+    uint retval = n2lidx(n);
+    assert(retval == Find(n),"Invalid node to lidx mapping");
+    return retval;
+  }
+
+  // Add edge between reg and everything in the vector.
+  // Same as _ifg->add_vector(reg,live) EXCEPT use the RegMask
+  // information to trim the set of interferences.  Return the
+  // count of edges added.
+  void interfere_with_live( uint reg, IndexSet *live );
+  // Count register pressure for asserts
+  uint count_int_pressure( IndexSet *liveout );
+  uint count_float_pressure( IndexSet *liveout );
+
+  // Build the interference graph using virtual registers only.
+  // Used for aggressive coalescing.
+  void build_ifg_virtual( );
+
+  // Build the interference graph using physical registers when available.
+  // That is, if 2 live ranges are simultaneously alive but in their
+  // acceptable register sets do not overlap, then they do not interfere.
+  uint build_ifg_physical( ResourceArea *a );
+
+  // Gather LiveRanGe information, including register masks and base pointer/
+  // derived pointer relationships.
+  void gather_lrg_masks( bool mod_cisc_masks );
+
+  // Force the bases of derived pointers to be alive at GC points.
+  bool stretch_base_pointer_live_ranges( ResourceArea *a );
+  // Helper to stretch above; recursively discover the base Node for
+  // a given derived Node.  Easy for AddP-related machine nodes, but
+  // needs to be recursive for derived Phis.
+  Node *find_base_for_derived( Node **derived_base_map, Node *derived, uint &maxlrg );
+
+  // Set the was-lo-degree bit.  Conservative coalescing should not change the
+  // colorability of the graph.  If any live range was of low-degree before
+  // coalescing, it should Simplify.  This call sets the was-lo-degree bit.
+  void set_was_low();
+
+  // Split live-ranges that must spill due to register conflicts (as opposed
+  // to capacity spills).  Typically these are things def'd in a register
+  // and used on the stack or vice-versa.
+  void pre_spill();
+
+  // Init LRG caching of degree, numregs.  Init lo_degree list.
+  void cache_lrg_info( );
+
+  // Simplify the IFG by removing LRGs of low degree with no copies
+  void Pre_Simplify();
+
+  // Simplify the IFG by removing LRGs of low degree
+  void Simplify();
+
+  // Select colors by re-inserting edges into the IFG.
+  // Return TRUE if any spills occured.
+  uint Select( );
+  // Helper function for select which allows biased coloring
+  OptoReg::Name choose_color( LRG &lrg, int chunk );
+  // Helper function which implements biasing heuristic
+  OptoReg::Name bias_color( LRG &lrg, int chunk );
+
+  // Split uncolorable live ranges
+  // Return new number of live ranges
+  uint Split( uint maxlrg );
+
+  // Copy 'was_spilled'-edness from one Node to another.
+  void copy_was_spilled( Node *src, Node *dst );
+  // Set the 'spilled_once' or 'spilled_twice' flag on a node.
+  void set_was_spilled( Node *n );
+
+  // Convert ideal spill-nodes into machine loads & stores
+  // Set C->failing when fixup spills could not complete, node limit exceeded.
+  void fixup_spills();
+
+  // Post-Allocation peephole copy removal
+  void post_allocate_copy_removal();
+  Node *skip_copies( Node *c );
+  int yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd );
+  int elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs );
+  int use_prior_register( Node *copy, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd );
+  bool may_be_copy_of_callee( Node *def ) const;
+
+  // If nreg already contains the same constant as val then eliminate it
+  bool eliminate_copy_of_constant(Node* val, Block *current_block, Node_List& value, Node_List &regnd,
+                                  OptoReg::Name nreg, OptoReg::Name nreg2);
+  // Extend the node to LRG mapping
+  void add_reference( const Node *node, const Node *old_node);
+
+private:
+
+  static int _final_loads, _final_stores, _final_copies, _final_memoves;
+  static double _final_load_cost, _final_store_cost, _final_copy_cost, _final_memove_cost;
+  static int _conserv_coalesce, _conserv_coalesce_pair;
+  static int _conserv_coalesce_trie, _conserv_coalesce_quad;
+  static int _post_alloc;
+  static int _lost_opp_pp_coalesce, _lost_opp_cflow_coalesce;
+  static int _used_cisc_instructions, _unused_cisc_instructions;
+  static int _allocator_attempts, _allocator_successes;
+
+#ifndef PRODUCT
+  static uint _high_pressure, _low_pressure;
+
+  void dump() const;
+  void dump( const Node *n ) const;
+  void dump( const Block * b ) const;
+  void dump_degree_lists() const;
+  void dump_simplified() const;
+  void dump_lrg( uint lidx ) const;
+  void dump_bb( uint pre_order ) const;
+
+  // Verify that base pointers and derived pointers are still sane
+  void verify_base_ptrs( ResourceArea *a ) const;
+
+  void dump_for_spill_split_recycle() const;
+
+public:
+  void dump_frame() const;
+  char *dump_register( const Node *n, char *buf  ) const;
+private:
+  static void print_chaitin_statistics();
+#endif
+  friend class PhaseCoalesce;
+  friend class PhaseAggressiveCoalesce;
+  friend class PhaseConservativeCoalesce;
+};
diff --git a/src/share/vm/opto/classes.cpp b/src/share/vm/opto/classes.cpp
new file mode 100644
index 000000000..f8cef8a47
--- /dev/null
+++ b/src/share/vm/opto/classes.cpp
@@ -0,0 +1,34 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_classes.cpp.incl"
+
+// ----------------------------------------------------------------------------
+// Build a table of virtual functions to map from Nodes to dense integer
+// opcode names.
+int Node::Opcode() const { return Op_Node; }
+#define macro(x) int x##Node::Opcode() const { return Op_##x; }
+#include "classes.hpp"
+#undef macro
diff --git a/src/share/vm/opto/classes.hpp b/src/share/vm/opto/classes.hpp
new file mode 100644
index 000000000..26eff2b4e
--- /dev/null
+++ b/src/share/vm/opto/classes.hpp
@@ -0,0 +1,308 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// The giant table of Node classes.
+// One entry per class, sorted by class name.
+
+macro(AbsD)
+macro(AbsF)
+macro(AbsI)
+macro(AddD)
+macro(AddF)
+macro(AddI)
+macro(AddL)
+macro(AddP)
+macro(Allocate)
+macro(AllocateArray)
+macro(AndI)
+macro(AndL)
+macro(AtanD)
+macro(Binary)
+macro(Bool)
+macro(BoxLock)
+macro(ReverseBytesI)
+macro(ReverseBytesL)
+macro(CProj)
+macro(CallDynamicJava)
+macro(CallJava)
+macro(CallLeaf)
+macro(CallLeafNoFP)
+macro(CallRuntime)
+macro(CallStaticJava)
+macro(CastII)
+macro(CastX2P)
+macro(CastP2X)
+macro(CastPP)
+macro(Catch)
+macro(CatchProj)
+macro(CheckCastPP)
+macro(ClearArray)
+macro(ConstraintCast)
+macro(CMoveD)
+macro(CMoveF)
+macro(CMoveI)
+macro(CMoveL)
+macro(CMoveP)
+macro(CmpD)
+macro(CmpD3)
+macro(CmpF)
+macro(CmpF3)
+macro(CmpI)
+macro(CmpL)
+macro(CmpL3)
+macro(CmpLTMask)
+macro(CmpP)
+macro(CmpU)
+macro(CompareAndSwapI)
+macro(CompareAndSwapL)
+macro(CompareAndSwapP)
+macro(Con)
+macro(ConD)
+macro(ConF)
+macro(ConI)
+macro(ConL)
+macro(ConP)
+macro(Conv2B)
+macro(ConvD2F)
+macro(ConvD2I)
+macro(ConvD2L)
+macro(ConvF2D)
+macro(ConvF2I)
+macro(ConvF2L)
+macro(ConvI2D)
+macro(ConvI2F)
+macro(ConvI2L)
+macro(ConvL2D)
+macro(ConvL2F)
+macro(ConvL2I)
+macro(CosD)
+macro(CountedLoop)
+macro(CountedLoopEnd)
+macro(CreateEx)
+macro(DivD)
+macro(DivF)
+macro(DivI)
+macro(DivL)
+macro(DivMod)
+macro(DivModI)
+macro(DivModL)
+macro(ExpD)
+macro(FastLock)
+macro(FastUnlock)
+macro(Goto)
+macro(Halt)
+macro(If)
+macro(IfFalse)
+macro(IfTrue)
+macro(Initialize)
+macro(JProj)
+macro(Jump)
+macro(JumpProj)
+macro(LShiftI)
+macro(LShiftL)
+macro(LoadB)
+macro(LoadC)
+macro(LoadD)
+macro(LoadD_unaligned)
+macro(LoadF)
+macro(LoadI)
+macro(LoadKlass)
+macro(LoadL)
+macro(LoadL_unaligned)
+macro(LoadPLocked)
+macro(LoadLLocked)
+macro(LoadP)
+macro(LoadRange)
+macro(LoadS)
+macro(Lock)
+macro(LogD)
+macro(Log10D)
+macro(Loop)
+macro(Mach)
+macro(MachProj)
+macro(MaxI)
+macro(MemBarAcquire)
+macro(MemBarCPUOrder)
+macro(MemBarRelease)
+macro(MemBarVolatile)
+macro(MergeMem)
+macro(MinI)
+macro(ModD)
+macro(ModF)
+macro(ModI)
+macro(ModL)
+macro(MoveI2F)
+macro(MoveF2I)
+macro(MoveL2D)
+macro(MoveD2L)
+macro(MulD)
+macro(MulF)
+macro(MulI)
+macro(MulL)
+macro(Multi)
+macro(NegD)
+macro(NegF)
+macro(NeverBranch)
+macro(Opaque1)
+macro(Opaque2)
+macro(OrI)
+macro(OrL)
+macro(PCTable)
+macro(Parm)
+macro(PartialSubtypeCheck)
+macro(Phi)
+macro(PowD)
+macro(PrefetchRead)
+macro(PrefetchWrite)
+macro(Proj)
+macro(RShiftI)
+macro(RShiftL)
+macro(Region)
+macro(Rethrow)
+macro(Return)
+macro(Root)
+macro(RoundDouble)
+macro(RoundFloat)
+macro(SafePoint)
+macro(SCMemProj)
+macro(SinD)
+macro(SqrtD)
+macro(Start)
+macro(StartOSR)
+macro(StoreB)
+macro(StoreC)
+macro(StoreCM)
+macro(StorePConditional)
+macro(StoreLConditional)
+macro(StoreD)
+macro(StoreF)
+macro(StoreI)
+macro(StoreL)
+macro(StoreP)
+macro(StrComp)
+macro(SubD)
+macro(SubF)
+macro(SubI)
+macro(SubL)
+macro(TailCall)
+macro(TailJump)
+macro(TanD)
+macro(ThreadLocal)
+macro(Unlock)
+macro(URShiftI)
+macro(URShiftL)
+macro(XorI)
+macro(XorL)
+macro(Vector)
+macro(AddVB)
+macro(AddVC)
+macro(AddVS)
+macro(AddVI)
+macro(AddVL)
+macro(AddVF)
+macro(AddVD)
+macro(SubVB)
+macro(SubVC)
+macro(SubVS)
+macro(SubVI)
+macro(SubVL)
+macro(SubVF)
+macro(SubVD)
+macro(MulVF)
+macro(MulVD)
+macro(DivVF)
+macro(DivVD)
+macro(LShiftVB)
+macro(LShiftVC)
+macro(LShiftVS)
+macro(LShiftVI)
+macro(URShiftVB)
+macro(URShiftVC)
+macro(URShiftVS)
+macro(URShiftVI)
+macro(AndV)
+macro(OrV)
+macro(XorV)
+macro(VectorLoad)
+macro(Load16B)
+macro(Load8B)
+macro(Load4B)
+macro(Load8C)
+macro(Load4C)
+macro(Load2C)
+macro(Load8S)
+macro(Load4S)
+macro(Load2S)
+macro(Load4I)
+macro(Load2I)
+macro(Load2L)
+macro(Load4F)
+macro(Load2F)
+macro(Load2D)
+macro(VectorStore)
+macro(Store16B)
+macro(Store8B)
+macro(Store4B)
+macro(Store8C)
+macro(Store4C)
+macro(Store2C)
+macro(Store4I)
+macro(Store2I)
+macro(Store2L)
+macro(Store4F)
+macro(Store2F)
+macro(Store2D)
+macro(Pack)
+macro(PackB)
+macro(PackS)
+macro(PackC)
+macro(PackI)
+macro(PackL)
+macro(PackF)
+macro(PackD)
+macro(Pack2x1B)
+macro(Pack2x2B)
+macro(Replicate16B)
+macro(Replicate8B)
+macro(Replicate4B)
+macro(Replicate8S)
+macro(Replicate4S)
+macro(Replicate2S)
+macro(Replicate8C)
+macro(Replicate4C)
+macro(Replicate2C)
+macro(Replicate4I)
+macro(Replicate2I)
+macro(Replicate2L)
+macro(Replicate4F)
+macro(Replicate2F)
+macro(Replicate2D)
+macro(Extract)
+macro(ExtractB)
+macro(ExtractS)
+macro(ExtractC)
+macro(ExtractI)
+macro(ExtractL)
+macro(ExtractF)
+macro(ExtractD)
diff --git a/src/share/vm/opto/coalesce.cpp b/src/share/vm/opto/coalesce.cpp
new file mode 100644
index 000000000..20e9bd179
--- /dev/null
+++ b/src/share/vm/opto/coalesce.cpp
@@ -0,0 +1,915 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_coalesce.cpp.incl"
+
+//=============================================================================
+//------------------------------reset_uf_map-----------------------------------
+void PhaseChaitin::reset_uf_map( uint maxlrg ) {
+  _maxlrg = maxlrg;
+  // Force the Union-Find mapping to be at least this large
+  _uf_map.extend(_maxlrg,0);
+  // Initialize it to be the ID mapping.
+  for( uint i=0; i<_maxlrg; i++ )
+    _uf_map.map(i,i);
+}
+
+//------------------------------compress_uf_map--------------------------------
+// Make all Nodes map directly to their final live range; no need for
+// the Union-Find mapping after this call.
+void PhaseChaitin::compress_uf_map_for_nodes( ) {
+  // For all Nodes, compress mapping
+  uint unique = _names.Size();
+  for( uint i=0; i<unique; i++ ) {
+    uint lrg = _names[i];
+    uint compressed_lrg = Find(lrg);
+    if( lrg != compressed_lrg )
+      _names.map(i,compressed_lrg);
+  }
+}
+
+//------------------------------Find-------------------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint PhaseChaitin::Find_compress( uint lrg ) {
+  uint cur = lrg;
+  uint next = _uf_map[cur];
+  while( next != cur ) {        // Scan chain of equivalences
+    assert( next < cur, "always union smaller" );
+    cur = next;                 // until find a fixed-point
+    next = _uf_map[cur];
+  }
+  // Core of union-find algorithm: update chain of
+  // equivalences to be equal to the root.
+  while( lrg != next ) {
+    uint tmp = _uf_map[lrg];
+    _uf_map.map(lrg, next);
+    lrg = tmp;
+  }
+  return lrg;
+}
+
+//------------------------------Find-------------------------------------------
+// Straight out of Tarjan's union-find algorithm
+uint PhaseChaitin::Find_compress( const Node *n ) {
+  uint lrg = Find_compress(_names[n->_idx]);
+  _names.map(n->_idx,lrg);
+  return lrg;
+}
+
+//------------------------------Find_const-------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint PhaseChaitin::Find_const( uint lrg ) const {
+  if( !lrg ) return lrg;        // Ignore the zero LRG
+  // Off the end?  This happens during debugging dumps when you got
+  // brand new live ranges but have not told the allocator yet.
+  if( lrg >= _maxlrg ) return lrg;
+  uint next = _uf_map[lrg];
+  while( next != lrg ) {        // Scan chain of equivalences
+    assert( next < lrg, "always union smaller" );
+    lrg = next;                 // until find a fixed-point
+    next = _uf_map[lrg];
+  }
+  return next;
+}
+
+//------------------------------Find-------------------------------------------
+// Like Find above, but no path compress, so bad asymptotic behavior
+uint PhaseChaitin::Find_const( const Node *n ) const {
+  if( n->_idx >= _names.Size() ) return 0; // not mapped, usual for debug dump
+  return Find_const( _names[n->_idx] );
+}
+
+//------------------------------Union------------------------------------------
+// union 2 sets together.
+void PhaseChaitin::Union( const Node *src_n, const Node *dst_n ) {
+  uint src = Find(src_n);
+  uint dst = Find(dst_n);
+  assert( src, "" );
+  assert( dst, "" );
+  assert( src < _maxlrg, "oob" );
+  assert( dst < _maxlrg, "oob" );
+  assert( src < dst, "always union smaller" );
+  _uf_map.map(dst,src);
+}
+
+//------------------------------new_lrg----------------------------------------
+void PhaseChaitin::new_lrg( const Node *x, uint lrg ) {
+  // Make the Node->LRG mapping
+  _names.extend(x->_idx,lrg);
+  // Make the Union-Find mapping an identity function
+  _uf_map.extend(lrg,lrg);
+}
+
+//------------------------------clone_projs------------------------------------
+// After cloning some rematierialized instruction, clone any MachProj's that
+// follow it.  Example: Intel zero is XOR, kills flags.  Sparc FP constants
+// use G3 as an address temp.
+int PhaseChaitin::clone_projs( Block *b, uint idx, Node *con, Node *copy, uint &maxlrg ) {
+  Block *bcon = _cfg._bbs[con->_idx];
+  uint cindex = bcon->find_node(con);
+  Node *con_next = bcon->_nodes[cindex+1];
+  if( con_next->in(0) != con || con_next->Opcode() != Op_MachProj )
+    return false;               // No MachProj's follow
+
+  // Copy kills after the cloned constant
+  Node *kills = con_next->clone();
+  kills->set_req( 0, copy );
+  b->_nodes.insert( idx, kills );
+  _cfg._bbs.map( kills->_idx, b );
+  new_lrg( kills, maxlrg++ );
+  return true;
+}
+
+//------------------------------compact----------------------------------------
+// Renumber the live ranges to compact them.  Makes the IFG smaller.
+void PhaseChaitin::compact() {
+  // Current the _uf_map contains a series of short chains which are headed
+  // by a self-cycle.  All the chains run from big numbers to little numbers.
+  // The Find() call chases the chains & shortens them for the next Find call.
+  // We are going to change this structure slightly.  Numbers above a moving
+  // wave 'i' are unchanged.  Numbers below 'j' point directly to their
+  // compacted live range with no further chaining.  There are no chains or
+  // cycles below 'i', so the Find call no longer works.
+  uint j=1;
+  uint i;
+  for( i=1; i < _maxlrg; i++ ) {
+    uint lr = _uf_map[i];
+    // Ignore unallocated live ranges
+    if( !lr ) continue;
+    assert( lr <= i, "" );
+    _uf_map.map(i, ( lr == i ) ? j++ : _uf_map[lr]);
+  }
+  if( false )                  // PrintOptoCompactLiveRanges
+    printf("Compacted %d LRs from %d\n",i-j,i);
+  // Now change the Node->LR mapping to reflect the compacted names
+  uint unique = _names.Size();
+  for( i=0; i<unique; i++ )
+    _names.map(i,_uf_map[_names[i]]);
+
+  // Reset the Union-Find mapping
+  reset_uf_map(j);
+
+}
+
+//=============================================================================
+//------------------------------Dump-------------------------------------------
+#ifndef PRODUCT
+void PhaseCoalesce::dump( Node *n ) const {
+  // Being a const function means I cannot use 'Find'
+  uint r = _phc.Find(n);
+  tty->print("L%d/N%d ",r,n->_idx);
+}
+
+//------------------------------dump-------------------------------------------
+void PhaseCoalesce::dump() const {
+  // I know I have a block layout now, so I can print blocks in a loop
+  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+    uint j;
+    Block *b = _phc._cfg._blocks[i];
+    // Print a nice block header
+    tty->print("B%d: ",b->_pre_order);
+    for( j=1; j<b->num_preds(); j++ )
+      tty->print("B%d ", _phc._cfg._bbs[b->pred(j)->_idx]->_pre_order);
+    tty->print("-> ");
+    for( j=0; j<b->_num_succs; j++ )
+      tty->print("B%d ",b->_succs[j]->_pre_order);
+    tty->print(" IDom: B%d/#%d\n", b->_idom ? b->_idom->_pre_order : 0, b->_dom_depth);
+    uint cnt = b->_nodes.size();
+    for( j=0; j<cnt; j++ ) {
+      Node *n = b->_nodes[j];
+      dump( n );
+      tty->print("\t%s\t",n->Name());
+
+      // Dump the inputs
+      uint k;                   // Exit value of loop
+      for( k=0; k<n->req(); k++ ) // For all required inputs
+        if( n->in(k) ) dump( n->in(k) );
+        else tty->print("_ ");
+      int any_prec = 0;
+      for( ; k<n->len(); k++ )          // For all precedence inputs
+        if( n->in(k) ) {
+          if( !any_prec++ ) tty->print(" |");
+          dump( n->in(k) );
+        }
+
+      // Dump node-specific info
+      n->dump_spec(tty);
+      tty->print("\n");
+
+    }
+    tty->print("\n");
+  }
+}
+#endif
+
+//------------------------------combine_these_two------------------------------
+// Combine the live ranges def'd by these 2 Nodes.  N2 is an input to N1.
+void PhaseCoalesce::combine_these_two( Node *n1, Node *n2 ) {
+  uint lr1 = _phc.Find(n1);
+  uint lr2 = _phc.Find(n2);
+  if( lr1 != lr2 &&             // Different live ranges already AND
+      !_phc._ifg->test_edge_sq( lr1, lr2 ) ) {  // Do not interfere
+    LRG *lrg1 = &_phc.lrgs(lr1);
+    LRG *lrg2 = &_phc.lrgs(lr2);
+    // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
+
+    // Now, why is int->oop OK?  We end up declaring a raw-pointer as an oop
+    // and in general that's a bad thing.  However, int->oop conversions only
+    // happen at GC points, so the lifetime of the misclassified raw-pointer
+    // is from the CheckCastPP (that converts it to an oop) backwards up
+    // through a merge point and into the slow-path call, and around the
+    // diamond up to the heap-top check and back down into the slow-path call.
+    // The misclassified raw pointer is NOT live across the slow-path call,
+    // and so does not appear in any GC info, so the fact that it is
+    // misclassified is OK.
+
+    if( (lrg1->_is_oop || !lrg2->_is_oop) && // not an oop->int cast AND
+        // Compatible final mask
+        lrg1->mask().overlap( lrg2->mask() ) ) {
+      // Merge larger into smaller.
+      if( lr1 > lr2 ) {
+        uint  tmp =  lr1;  lr1 =  lr2;  lr2 =  tmp;
+        Node   *n =   n1;   n1 =   n2;   n2 =    n;
+        LRG *ltmp = lrg1; lrg1 = lrg2; lrg2 = ltmp;
+      }
+      // Union lr2 into lr1
+      _phc.Union( n1, n2 );
+      if (lrg1->_maxfreq < lrg2->_maxfreq)
+        lrg1->_maxfreq = lrg2->_maxfreq;
+      // Merge in the IFG
+      _phc._ifg->Union( lr1, lr2 );
+      // Combine register restrictions
+      lrg1->AND(lrg2->mask());
+    }
+  }
+}
+
+//------------------------------coalesce_driver--------------------------------
+// Copy coalescing
+void PhaseCoalesce::coalesce_driver( ) {
+
+  verify();
+  // Coalesce from high frequency to low
+  for( uint i=0; i<_phc._cfg._num_blocks; i++ )
+    coalesce( _phc._blks[i] );
+
+}
+
+//------------------------------insert_copy_with_overlap-----------------------
+// I am inserting copies to come out of SSA form.  In the general case, I am
+// doing a parallel renaming.  I'm in the Named world now, so I can't do a
+// general parallel renaming.  All the copies now use  "names" (live-ranges)
+// to carry values instead of the explicit use-def chains.  Suppose I need to
+// insert 2 copies into the same block.  They copy L161->L128 and L128->L132.
+// If I insert them in the wrong order then L128 will get clobbered before it
+// can get used by the second copy.  This cannot happen in the SSA model;
+// direct use-def chains get me the right value.  It DOES happen in the named
+// model so I have to handle the reordering of copies.
+//
+// In general, I need to topo-sort the placed copies to avoid conflicts.
+// Its possible to have a closed cycle of copies (e.g., recirculating the same
+// values around a loop).  In this case I need a temp to break the cycle.
+void PhaseAggressiveCoalesce::insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name ) {
+
+  // Scan backwards for the locations of the last use of the dst_name.
+  // I am about to clobber the dst_name, so the copy must be inserted
+  // after the last use.  Last use is really first-use on a backwards scan.
+  uint i = b->end_idx()-1;
+  while( 1 ) {
+    Node *n = b->_nodes[i];
+    // Check for end of virtual copies; this is also the end of the
+    // parallel renaming effort.
+    if( n->_idx < _unique ) break;
+    uint idx = n->is_Copy();
+    assert( idx || n->is_Con() || n->Opcode() == Op_MachProj, "Only copies during parallel renaming" );
+    if( idx && _phc.Find(n->in(idx)) == dst_name ) break;
+    i--;
+  }
+  uint last_use_idx = i;
+
+  // Also search for any kill of src_name that exits the block.
+  // Since the copy uses src_name, I have to come before any kill.
+  uint kill_src_idx = b->end_idx();
+  // There can be only 1 kill that exits any block and that is
+  // the last kill.  Thus it is the first kill on a backwards scan.
+  i = b->end_idx()-1;
+  while( 1 ) {
+    Node *n = b->_nodes[i];
+    // Check for end of virtual copies; this is also the end of the
+    // parallel renaming effort.
+    if( n->_idx < _unique ) break;
+    assert( n->is_Copy() || n->is_Con() || n->Opcode() == Op_MachProj, "Only copies during parallel renaming" );
+    if( _phc.Find(n) == src_name ) {
+      kill_src_idx = i;
+      break;
+    }
+    i--;
+  }
+  // Need a temp?  Last use of dst comes after the kill of src?
+  if( last_use_idx >= kill_src_idx ) {
+    // Need to break a cycle with a temp
+    uint idx = copy->is_Copy();
+    Node *tmp = copy->clone();
+    _phc.new_lrg(tmp,_phc._maxlrg++);
+    // Insert new temp between copy and source
+    tmp ->set_req(idx,copy->in(idx));
+    copy->set_req(idx,tmp);
+    // Save source in temp early, before source is killed
+    b->_nodes.insert(kill_src_idx,tmp);
+    _phc._cfg._bbs.map( tmp->_idx, b );
+    last_use_idx++;
+  }
+
+  // Insert just after last use
+  b->_nodes.insert(last_use_idx+1,copy);
+}
+
+//------------------------------insert_copies----------------------------------
+void PhaseAggressiveCoalesce::insert_copies( Matcher &matcher ) {
+  // We do LRGs compressing and fix a liveout data only here since the other
+  // place in Split() is guarded by the assert which we never hit.
+  _phc.compress_uf_map_for_nodes();
+  // Fix block's liveout data for compressed live ranges.
+  for(uint lrg = 1; lrg < _phc._maxlrg; lrg++ ) {
+    uint compressed_lrg = _phc.Find(lrg);
+    if( lrg != compressed_lrg ) {
+      for( uint bidx = 0; bidx < _phc._cfg._num_blocks; bidx++ ) {
+        IndexSet *liveout = _phc._live->live(_phc._cfg._blocks[bidx]);
+        if( liveout->member(lrg) ) {
+          liveout->remove(lrg);
+          liveout->insert(compressed_lrg);
+        }
+      }
+    }
+  }
+
+  // All new nodes added are actual copies to replace virtual copies.
+  // Nodes with index less than '_unique' are original, non-virtual Nodes.
+  _unique = C->unique();
+
+  for( uint i=0; i<_phc._cfg._num_blocks; i++ ) {
+    Block *b = _phc._cfg._blocks[i];
+    uint cnt = b->num_preds();  // Number of inputs to the Phi
+
+    for( uint l = 1; l<b->_nodes.size(); l++ ) {
+      Node *n = b->_nodes[l];
+
+      // Do not use removed-copies, use copied value instead
+      uint ncnt = n->req();
+      for( uint k = 1; k<ncnt; k++ ) {
+        Node *copy = n->in(k);
+        uint cidx = copy->is_Copy();
+        if( cidx ) {
+          Node *def = copy->in(cidx);
+          if( _phc.Find(copy) == _phc.Find(def) )
+            n->set_req(k,def);
+        }
+      }
+
+      // Remove any explicit copies that get coalesced.
+      uint cidx = n->is_Copy();
+      if( cidx ) {
+        Node *def = n->in(cidx);
+        if( _phc.Find(n) == _phc.Find(def) ) {
+          n->replace_by(def);
+          n->set_req(cidx,NULL);
+          b->_nodes.remove(l);
+          l--;
+          continue;
+        }
+      }
+
+      if( n->is_Phi() ) {
+        // Get the chosen name for the Phi
+        uint phi_name = _phc.Find( n );
+        // Ignore the pre-allocated specials
+        if( !phi_name ) continue;
+        // Check for mismatch inputs to Phi
+        for( uint j = 1; j<cnt; j++ ) {
+          Node *m = n->in(j);
+          uint src_name = _phc.Find(m);
+          if( src_name != phi_name ) {
+            Block *pred = _phc._cfg._bbs[b->pred(j)->_idx];
+            Node *copy;
+            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
+            // Rematerialize constants instead of copying them
+            if( m->is_Mach() && m->as_Mach()->is_Con() &&
+                m->as_Mach()->rematerialize() ) {
+              copy = m->clone();
+              // Insert the copy in the predecessor basic block
+              pred->add_inst(copy);
+              // Copy any flags as well
+              _phc.clone_projs( pred, pred->end_idx(), m, copy, _phc._maxlrg );
+            } else {
+              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
+              copy = new (C) MachSpillCopyNode(m,*rm,*rm);
+              // Find a good place to insert.  Kinda tricky, use a subroutine
+              insert_copy_with_overlap(pred,copy,phi_name,src_name);
+            }
+            // Insert the copy in the use-def chain
+            n->set_req( j, copy );
+            _phc._cfg._bbs.map( copy->_idx, pred );
+            // Extend ("register allocate") the names array for the copy.
+            _phc._names.extend( copy->_idx, phi_name );
+          } // End of if Phi names do not match
+        } // End of for all inputs to Phi
+      } else { // End of if Phi
+
+        // Now check for 2-address instructions
+        uint idx;
+        if( n->is_Mach() && (idx=n->as_Mach()->two_adr()) ) {
+          // Get the chosen name for the Node
+          uint name = _phc.Find( n );
+          assert( name, "no 2-address specials" );
+          // Check for name mis-match on the 2-address input
+          Node *m = n->in(idx);
+          if( _phc.Find(m) != name ) {
+            Node *copy;
+            assert(!m->is_Con() || m->is_Mach(), "all Con must be Mach");
+            // At this point it is unsafe to extend live ranges (6550579).
+            // Rematerialize only constants as we do for Phi above.
+            if( m->is_Mach() && m->as_Mach()->is_Con() &&
+                m->as_Mach()->rematerialize() ) {
+              copy = m->clone();
+              // Insert the copy in the basic block, just before us
+              b->_nodes.insert( l++, copy );
+              if( _phc.clone_projs( b, l, m, copy, _phc._maxlrg ) )
+                l++;
+            } else {
+              const RegMask *rm = C->matcher()->idealreg2spillmask[m->ideal_reg()];
+              copy = new (C) MachSpillCopyNode( m, *rm, *rm );
+              // Insert the copy in the basic block, just before us
+              b->_nodes.insert( l++, copy );
+            }
+            // Insert the copy in the use-def chain
+            n->set_req(idx, copy );
+            // Extend ("register allocate") the names array for the copy.
+            _phc._names.extend( copy->_idx, name );
+            _phc._cfg._bbs.map( copy->_idx, b );
+          }
+
+        } // End of is two-adr
+
+        // Insert a copy at a debug use for a lrg which has high frequency
+        if( (b->_freq < OPTO_DEBUG_SPLIT_FREQ) && n->is_MachSafePoint() ) {
+          // Walk the debug inputs to the node and check for lrg freq
+          JVMState* jvms = n->jvms();
+          uint debug_start = jvms ? jvms->debug_start() : 999999;
+          uint debug_end   = jvms ? jvms->debug_end()   : 999999;
+          for(uint inpidx = debug_start; inpidx < debug_end; inpidx++) {
+            // Do not split monitors; they are only needed for debug table
+            // entries and need no code.
+            if( jvms->is_monitor_use(inpidx) ) continue;
+            Node *inp = n->in(inpidx);
+            uint nidx = _phc.n2lidx(inp);
+            LRG &lrg = lrgs(nidx);
+
+            // If this lrg has a high frequency use/def
+            if( lrg._maxfreq >= OPTO_LRG_HIGH_FREQ ) {
+              // If the live range is also live out of this block (like it
+              // would be for a fast/slow idiom), the normal spill mechanism
+              // does an excellent job.  If it is not live out of this block
+              // (like it would be for debug info to uncommon trap) splitting
+              // the live range now allows a better allocation in the high
+              // frequency blocks.
+              //   Build_IFG_virtual has converted the live sets to
+              // live-IN info, not live-OUT info.
+              uint k;
+              for( k=0; k < b->_num_succs; k++ )
+                if( _phc._live->live(b->_succs[k])->member( nidx ) )
+                  break;      // Live in to some successor block?
+              if( k < b->_num_succs )
+                continue;     // Live out; do not pre-split
+              // Split the lrg at this use
+              const RegMask *rm = C->matcher()->idealreg2spillmask[inp->ideal_reg()];
+              Node *copy = new (C) MachSpillCopyNode( inp, *rm, *rm );
+              // Insert the copy in the use-def chain
+              n->set_req(inpidx, copy );
+              // Insert the copy in the basic block, just before us
+              b->_nodes.insert( l++, copy );
+              // Extend ("register allocate") the names array for the copy.
+              _phc.new_lrg( copy, _phc._maxlrg++ );
+              _phc._cfg._bbs.map( copy->_idx, b );
+              //tty->print_cr("Split a debug use in Aggressive Coalesce");
+            }  // End of if high frequency use/def
+          }  // End of for all debug inputs
+        }  // End of if low frequency safepoint
+
+      } // End of if Phi
+
+    } // End of for all instructions
+  } // End of for all blocks
+}
+
+//=============================================================================
+//------------------------------coalesce---------------------------------------
+// Aggressive (but pessimistic) copy coalescing of a single block
+
+// The following coalesce pass represents a single round of aggressive
+// pessimistic coalesce.  "Aggressive" means no attempt to preserve
+// colorability when coalescing.  This occasionally means more spills, but
+// it also means fewer rounds of coalescing for better code - and that means
+// faster compiles.
+
+// "Pessimistic" means we do not hit the fixed point in one pass (and we are
+// reaching for the least fixed point to boot).  This is typically solved
+// with a few more rounds of coalescing, but the compiler must run fast.  We
+// could optimistically coalescing everything touching PhiNodes together
+// into one big live range, then check for self-interference.  Everywhere
+// the live range interferes with self it would have to be split.  Finding
+// the right split points can be done with some heuristics (based on
+// expected frequency of edges in the live range).  In short, it's a real
+// research problem and the timeline is too short to allow such research.
+// Further thoughts: (1) build the LR in a pass, (2) find self-interference
+// in another pass, (3) per each self-conflict, split, (4) split by finding
+// the low-cost cut (min-cut) of the LR, (5) edges in the LR are weighted
+// according to the GCM algorithm (or just exec freq on CFG edges).
+
+void PhaseAggressiveCoalesce::coalesce( Block *b ) {
+  // Copies are still "virtual" - meaning we have not made them explicitly
+  // copies.  Instead, Phi functions of successor blocks have mis-matched
+  // live-ranges.  If I fail to coalesce, I'll have to insert a copy to line
+  // up the live-ranges.  Check for Phis in successor blocks.
+  uint i;
+  for( i=0; i<b->_num_succs; i++ ) {
+    Block *bs = b->_succs[i];
+    // Find index of 'b' in 'bs' predecessors
+    uint j=1;
+    while( _phc._cfg._bbs[bs->pred(j)->_idx] != b ) j++;
+    // Visit all the Phis in successor block
+    for( uint k = 1; k<bs->_nodes.size(); k++ ) {
+      Node *n = bs->_nodes[k];
+      if( !n->is_Phi() ) break;
+      combine_these_two( n, n->in(j) );
+    }
+  } // End of for all successor blocks
+
+
+  // Check _this_ block for 2-address instructions and copies.
+  uint cnt = b->end_idx();
+  for( i = 1; i<cnt; i++ ) {
+    Node *n = b->_nodes[i];
+    uint idx;
+    // 2-address instructions have a virtual Copy matching their input
+    // to their output
+    if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
+      MachNode *mach = n->as_Mach();
+      combine_these_two( mach, mach->in(idx) );
+    }
+  } // End of for all instructions in block
+}
+
+//=============================================================================
+//------------------------------PhaseConservativeCoalesce----------------------
+PhaseConservativeCoalesce::PhaseConservativeCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {
+  _ulr.initialize(_phc._maxlrg);
+}
+
+//------------------------------verify-----------------------------------------
+void PhaseConservativeCoalesce::verify() {
+#ifdef ASSERT
+  _phc.set_was_low();
+#endif
+}
+
+//------------------------------union_helper-----------------------------------
+void PhaseConservativeCoalesce::union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
+  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
+  // union-find tree
+  _phc.Union( lr1_node, lr2_node );
+
+  // Single-def live range ONLY if both live ranges are single-def.
+  // If both are single def, then src_def powers one live range
+  // and def_copy powers the other.  After merging, src_def powers
+  // the combined live range.
+  lrgs(lr1)._def = (lrgs(lr1)._def == NodeSentinel ||
+                        lrgs(lr2)._def == NodeSentinel )
+    ? NodeSentinel : src_def;
+  lrgs(lr2)._def = NULL;    // No def for lrg 2
+  lrgs(lr2).Clear();        // Force empty mask for LRG 2
+  //lrgs(lr2)._size = 0;      // Live-range 2 goes dead
+  lrgs(lr1)._is_oop |= lrgs(lr2)._is_oop;
+  lrgs(lr2)._is_oop = 0;    // In particular, not an oop for GC info
+
+  if (lrgs(lr1)._maxfreq < lrgs(lr2)._maxfreq)
+    lrgs(lr1)._maxfreq = lrgs(lr2)._maxfreq;
+
+  // Copy original value instead.  Intermediate copies go dead, and
+  // the dst_copy becomes useless.
+  int didx = dst_copy->is_Copy();
+  dst_copy->set_req( didx, src_def );
+  // Add copy to free list
+  // _phc.free_spillcopy(b->_nodes[bindex]);
+  assert( b->_nodes[bindex] == dst_copy, "" );
+  dst_copy->replace_by( dst_copy->in(didx) );
+  dst_copy->set_req( didx, NULL);
+  b->_nodes.remove(bindex);
+  if( bindex < b->_ihrp_index ) b->_ihrp_index--;
+  if( bindex < b->_fhrp_index ) b->_fhrp_index--;
+
+  // Stretched lr1; add it to liveness of intermediate blocks
+  Block *b2 = _phc._cfg._bbs[src_copy->_idx];
+  while( b != b2 ) {
+    b = _phc._cfg._bbs[b->pred(1)->_idx];
+    _phc._live->live(b)->insert(lr1);
+  }
+}
+
+//------------------------------compute_separating_interferences---------------
+// Factored code from copy_copy that computes extra interferences from
+// lengthening a live range by double-coalescing.
+uint PhaseConservativeCoalesce::compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint reg_degree, uint rm_size, uint lr1, uint lr2 ) {
+
+  assert(!lrgs(lr1)._fat_proj, "cannot coalesce fat_proj");
+  assert(!lrgs(lr2)._fat_proj, "cannot coalesce fat_proj");
+  Node *prev_copy = dst_copy->in(dst_copy->is_Copy());
+  Block *b2 = b;
+  uint bindex2 = bindex;
+  while( 1 ) {
+    // Find previous instruction
+    bindex2--;                  // Chain backwards 1 instruction
+    while( bindex2 == 0 ) {     // At block start, find prior block
+      assert( b2->num_preds() == 2, "cannot double coalesce across c-flow" );
+      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+      bindex2 = b2->end_idx()-1;
+    }
+    // Get prior instruction
+    assert(bindex2 < b2->_nodes.size(), "index out of bounds");
+    Node *x = b2->_nodes[bindex2];
+    if( x == prev_copy ) {      // Previous copy in copy chain?
+      if( prev_copy == src_copy)// Found end of chain and all interferences
+        break;                  // So break out of loop
+      // Else work back one in copy chain
+      prev_copy = prev_copy->in(prev_copy->is_Copy());
+    } else {                    // Else collect interferences
+      uint lidx = _phc.Find(x);
+      // Found another def of live-range being stretched?
+      if( lidx == lr1 ) return max_juint;
+      if( lidx == lr2 ) return max_juint;
+
+      // If we attempt to coalesce across a bound def
+      if( lrgs(lidx).is_bound() ) {
+        // Do not let the coalesced LRG expect to get the bound color
+        rm.SUBTRACT( lrgs(lidx).mask() );
+        // Recompute rm_size
+        rm_size = rm.Size();
+        //if( rm._flags ) rm_size += 1000000;
+        if( reg_degree >= rm_size ) return max_juint;
+      }
+      if( rm.overlap(lrgs(lidx).mask()) ) {
+        // Insert lidx into union LRG; returns TRUE if actually inserted
+        if( _ulr.insert(lidx) ) {
+          // Infinite-stack neighbors do not alter colorability, as they
+          // can always color to some other color.
+          if( !lrgs(lidx).mask().is_AllStack() ) {
+            // If this coalesce will make any new neighbor uncolorable,
+            // do not coalesce.
+            if( lrgs(lidx).just_lo_degree() )
+              return max_juint;
+            // Bump our degree
+            if( ++reg_degree >= rm_size )
+              return max_juint;
+          } // End of if not infinite-stack neighbor
+        } // End of if actually inserted
+      } // End of if live range overlaps
+    } // End of else collect intereferences for 1 node
+  } // End of while forever, scan back for intereferences
+  return reg_degree;
+}
+
+//------------------------------update_ifg-------------------------------------
+void PhaseConservativeCoalesce::update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2) {
+  // Some original neighbors of lr1 might have gone away
+  // because the constrained register mask prevented them.
+  // Remove lr1 from such neighbors.
+  IndexSetIterator one(n_lr1);
+  uint neighbor;
+  LRG &lrg1 = lrgs(lr1);
+  while ((neighbor = one.next()) != 0)
+    if( !_ulr.member(neighbor) )
+      if( _phc._ifg->neighbors(neighbor)->remove(lr1) )
+        lrgs(neighbor).inc_degree( -lrg1.compute_degree(lrgs(neighbor)) );
+
+
+  // lr2 is now called (coalesced into) lr1.
+  // Remove lr2 from the IFG.
+  IndexSetIterator two(n_lr2);
+  LRG &lrg2 = lrgs(lr2);
+  while ((neighbor = two.next()) != 0)
+    if( _phc._ifg->neighbors(neighbor)->remove(lr2) )
+      lrgs(neighbor).inc_degree( -lrg2.compute_degree(lrgs(neighbor)) );
+
+  // Some neighbors of intermediate copies now interfere with the
+  // combined live range.
+  IndexSetIterator three(&_ulr);
+  while ((neighbor = three.next()) != 0)
+    if( _phc._ifg->neighbors(neighbor)->insert(lr1) )
+      lrgs(neighbor).inc_degree( lrg1.compute_degree(lrgs(neighbor)) );
+}
+
+//------------------------------record_bias------------------------------------
+static void record_bias( const PhaseIFG *ifg, int lr1, int lr2 ) {
+  // Tag copy bias here
+  if( !ifg->lrgs(lr1)._copy_bias )
+    ifg->lrgs(lr1)._copy_bias = lr2;
+  if( !ifg->lrgs(lr2)._copy_bias )
+    ifg->lrgs(lr2)._copy_bias = lr1;
+}
+
+//------------------------------copy_copy--------------------------------------
+// See if I can coalesce a series of multiple copies together.  I need the
+// final dest copy and the original src copy.  They can be the same Node.
+// Compute the compatible register masks.
+bool PhaseConservativeCoalesce::copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex ) {
+
+  if( !dst_copy->is_SpillCopy() ) return false;
+  if( !src_copy->is_SpillCopy() ) return false;
+  Node *src_def = src_copy->in(src_copy->is_Copy());
+  uint lr1 = _phc.Find(dst_copy);
+  uint lr2 = _phc.Find(src_def );
+
+  // Same live ranges already?
+  if( lr1 == lr2 ) return false;
+
+  // Interfere?
+  if( _phc._ifg->test_edge_sq( lr1, lr2 ) ) return false;
+
+  // Not an oop->int cast; oop->oop, int->int, AND int->oop are OK.
+  if( !lrgs(lr1)._is_oop && lrgs(lr2)._is_oop ) // not an oop->int cast
+    return false;
+
+  // Coalescing between an aligned live range and a mis-aligned live range?
+  // No, no!  Alignment changes how we count degree.
+  if( lrgs(lr1)._fat_proj != lrgs(lr2)._fat_proj )
+    return false;
+
+  // Sort; use smaller live-range number
+  Node *lr1_node = dst_copy;
+  Node *lr2_node = src_def;
+  if( lr1 > lr2 ) {
+    uint tmp = lr1; lr1 = lr2; lr2 = tmp;
+    lr1_node = src_def;  lr2_node = dst_copy;
+  }
+
+  // Check for compatibility of the 2 live ranges by
+  // intersecting their allowed register sets.
+  RegMask rm = lrgs(lr1).mask();
+  rm.AND(lrgs(lr2).mask());
+  // Number of bits free
+  uint rm_size = rm.Size();
+
+  // If we can use any stack slot, then effective size is infinite
+  if( rm.is_AllStack() ) rm_size += 1000000;
+  // Incompatible masks, no way to coalesce
+  if( rm_size == 0 ) return false;
+
+  // Another early bail-out test is when we are double-coalescing and the
+  // 2 copies are seperated by some control flow.
+  if( dst_copy != src_copy ) {
+    Block *src_b = _phc._cfg._bbs[src_copy->_idx];
+    Block *b2 = b;
+    while( b2 != src_b ) {
+      if( b2->num_preds() > 2 ){// Found merge-point
+        _phc._lost_opp_cflow_coalesce++;
+        // extra record_bias commented out because Chris believes it is not
+        // productive.  Since we can record only 1 bias, we want to choose one
+        // that stands a chance of working and this one probably does not.
+        //record_bias( _phc._lrgs, lr1, lr2 );
+        return false;           // To hard to find all interferences
+      }
+      b2 = _phc._cfg._bbs[b2->pred(1)->_idx];
+    }
+  }
+
+  // Union the two interference sets together into '_ulr'
+  uint reg_degree = _ulr.lrg_union( lr1, lr2, rm_size, _phc._ifg, rm );
+
+  if( reg_degree >= rm_size ) {
+    record_bias( _phc._ifg, lr1, lr2 );
+    return false;
+  }
+
+  // Now I need to compute all the interferences between dst_copy and
+  // src_copy.  I'm not willing visit the entire interference graph, so
+  // I limit my search to things in dst_copy's block or in a straight
+  // line of previous blocks.  I give up at merge points or when I get
+  // more interferences than my degree.  I can stop when I find src_copy.
+  if( dst_copy != src_copy ) {
+    reg_degree = compute_separating_interferences(dst_copy, src_copy, b, bindex, rm, rm_size, reg_degree, lr1, lr2 );
+    if( reg_degree == max_juint ) {
+      record_bias( _phc._ifg, lr1, lr2 );
+      return false;
+    }
+  } // End of if dst_copy & src_copy are different
+
+
+  // ---- THE COMBINED LRG IS COLORABLE ----
+
+  // YEAH - Now coalesce this copy away
+  assert( lrgs(lr1).num_regs() == lrgs(lr2).num_regs(),   "" );
+
+  IndexSet *n_lr1 = _phc._ifg->neighbors(lr1);
+  IndexSet *n_lr2 = _phc._ifg->neighbors(lr2);
+
+  // Update the interference graph
+  update_ifg(lr1, lr2, n_lr1, n_lr2);
+
+  _ulr.remove(lr1);
+
+  // Uncomment the following code to trace Coalescing in great detail.
+  //
+  //if (false) {
+  //  tty->cr();
+  //  tty->print_cr("#######################################");
+  //  tty->print_cr("union %d and %d", lr1, lr2);
+  //  n_lr1->dump();
+  //  n_lr2->dump();
+  //  tty->print_cr("resulting set is");
+  //  _ulr.dump();
+  //}
+
+  // Replace n_lr1 with the new combined live range.  _ulr will use
+  // n_lr1's old memory on the next iteration.  n_lr2 is cleared to
+  // send its internal memory to the free list.
+  _ulr.swap(n_lr1);
+  _ulr.clear();
+  n_lr2->clear();
+
+  lrgs(lr1).set_degree( _phc._ifg->effective_degree(lr1) );
+  lrgs(lr2).set_degree( 0 );
+
+  // Join live ranges.  Merge larger into smaller.  Union lr2 into lr1 in the
+  // union-find tree
+  union_helper( lr1_node, lr2_node, lr1, lr2, src_def, dst_copy, src_copy, b, bindex );
+  // Combine register restrictions
+  lrgs(lr1).set_mask(rm);
+  lrgs(lr1).compute_set_mask_size();
+  lrgs(lr1)._cost += lrgs(lr2)._cost;
+  lrgs(lr1)._area += lrgs(lr2)._area;
+
+  // While its uncommon to successfully coalesce live ranges that started out
+  // being not-lo-degree, it can happen.  In any case the combined coalesced
+  // live range better Simplify nicely.
+  lrgs(lr1)._was_lo = 1;
+
+  // kinda expensive to do all the time
+  //tty->print_cr("warning: slow verify happening");
+  //_phc._ifg->verify( &_phc );
+  return true;
+}
+
+//------------------------------coalesce---------------------------------------
+// Conservative (but pessimistic) copy coalescing of a single block
+void PhaseConservativeCoalesce::coalesce( Block *b ) {
+  // Bail out on infrequent blocks
+  if( b->is_uncommon(_phc._cfg._bbs) )
+    return;
+  // Check this block for copies.
+  for( uint i = 1; i<b->end_idx(); i++ ) {
+    // Check for actual copies on inputs.  Coalesce a copy into its
+    // input if use and copy's input are compatible.
+    Node *copy1 = b->_nodes[i];
+    uint idx1 = copy1->is_Copy();
+    if( !idx1 ) continue;       // Not a copy
+
+    if( copy_copy(copy1,copy1,b,i) ) {
+      i--;                      // Retry, same location in block
+      PhaseChaitin::_conserv_coalesce++;  // Collect stats on success
+      continue;
+    }
+
+    /* do not attempt pairs.  About 1/2 of all pairs can be removed by
+       post-alloc.  The other set are too few to bother.
+    Node *copy2 = copy1->in(idx1);
+    uint idx2 = copy2->is_Copy();
+    if( !idx2 ) continue;
+    if( copy_copy(copy1,copy2,b,i) ) {
+      i--;                      // Retry, same location in block
+      PhaseChaitin::_conserv_coalesce_pair++; // Collect stats on success
+      continue;
+    }
+    */
+  }
+}
diff --git a/src/share/vm/opto/coalesce.hpp b/src/share/vm/opto/coalesce.hpp
new file mode 100644
index 000000000..b7cd9da87
--- /dev/null
+++ b/src/share/vm/opto/coalesce.hpp
@@ -0,0 +1,109 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class LoopTree;
+class LRG;
+class LRG_List;
+class Matcher;
+class PhaseIFG;
+class PhaseCFG;
+
+//------------------------------PhaseCoalesce----------------------------------
+class PhaseCoalesce : public Phase {
+protected:
+  PhaseChaitin &_phc;
+
+public:
+  // Coalesce copies
+  PhaseCoalesce( PhaseChaitin &chaitin ) : Phase(Coalesce), _phc(chaitin) { }
+
+  virtual void verify() = 0;
+
+  // Coalesce copies
+  void coalesce_driver( );
+
+  // Coalesce copies in this block
+  virtual void coalesce( Block *b ) = 0;
+
+  // Attempt to coalesce live ranges defined by these 2
+  void combine_these_two( Node *n1, Node *n2 );
+
+  LRG &lrgs( uint lidx ) { return _phc.lrgs(lidx); }
+#ifndef PRODUCT
+  // Dump internally name
+  void dump( Node *n ) const;
+  // Dump whole shebang
+  void dump() const;
+#endif
+};
+
+//------------------------------PhaseAggressiveCoalesce------------------------
+// Aggressively, pessimistic coalesce copies.  Aggressive means ignore graph
+// colorability; perhaps coalescing to the point of forcing a spill.
+// Pessimistic means we cannot coalesce if 2 live ranges interfere.  This
+// implies we do not hit a fixed point right away.
+class PhaseAggressiveCoalesce : public PhaseCoalesce {
+  uint _unique;
+public:
+  // Coalesce copies
+  PhaseAggressiveCoalesce( PhaseChaitin &chaitin ) : PhaseCoalesce(chaitin) {}
+
+  virtual void verify() { };
+
+  // Aggressively coalesce copies in this block
+  virtual void coalesce( Block *b );
+
+  // Where I fail to coalesce, manifest virtual copies as the Real Thing
+  void insert_copies( Matcher &matcher );
+
+  // Copy insertion needs some smarts in case live ranges overlap
+  void insert_copy_with_overlap( Block *b, Node *copy, uint dst_name, uint src_name );
+};
+
+
+//------------------------------PhaseConservativeCoalesce----------------------
+// Conservatively, pessimistic coalesce copies.  Conservative means do not
+// coalesce if the resultant live range will be uncolorable.  Pessimistic
+// means we cannot coalesce if 2 live ranges interfere.  This implies we do
+// not hit a fixed point right away.
+class PhaseConservativeCoalesce : public PhaseCoalesce {
+  IndexSet _ulr;               // Union live range interferences
+public:
+  // Coalesce copies
+  PhaseConservativeCoalesce( PhaseChaitin &chaitin );
+
+  virtual void verify();
+
+  // Conservatively coalesce copies in this block
+  virtual void coalesce( Block *b );
+
+  // Coalesce this chain of copies away
+  bool copy_copy( Node *dst_copy, Node *src_copy, Block *b, uint bindex );
+
+  void union_helper( Node *lr1_node, Node *lr2_node, uint lr1, uint lr2, Node *src_def, Node *dst_copy, Node *src_copy, Block *b, uint bindex );
+
+  uint compute_separating_interferences(Node *dst_copy, Node *src_copy, Block *b, uint bindex, RegMask &rm, uint rm_size, uint reg_degree, uint lr1, uint lr2);
+
+  void update_ifg(uint lr1, uint lr2, IndexSet *n_lr1, IndexSet *n_lr2);
+};
diff --git a/src/share/vm/opto/compile.cpp b/src/share/vm/opto/compile.cpp
new file mode 100644
index 000000000..d62dbf344
--- /dev/null
+++ b/src/share/vm/opto/compile.cpp
@@ -0,0 +1,2384 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_compile.cpp.incl"
+
+/// Support for intrinsics.
+
+// Return the index at which m must be inserted (or already exists).
+// The sort order is by the address of the ciMethod, with is_virtual as minor key.
+int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
+#ifdef ASSERT
+  for (int i = 1; i < _intrinsics->length(); i++) {
+    CallGenerator* cg1 = _intrinsics->at(i-1);
+    CallGenerator* cg2 = _intrinsics->at(i);
+    assert(cg1->method() != cg2->method()
+           ? cg1->method()     < cg2->method()
+           : cg1->is_virtual() < cg2->is_virtual(),
+           "compiler intrinsics list must stay sorted");
+  }
+#endif
+  // Binary search sorted list, in decreasing intervals [lo, hi].
+  int lo = 0, hi = _intrinsics->length()-1;
+  while (lo <= hi) {
+    int mid = (uint)(hi + lo) / 2;
+    ciMethod* mid_m = _intrinsics->at(mid)->method();
+    if (m < mid_m) {
+      hi = mid-1;
+    } else if (m > mid_m) {
+      lo = mid+1;
+    } else {
+      // look at minor sort key
+      bool mid_virt = _intrinsics->at(mid)->is_virtual();
+      if (is_virtual < mid_virt) {
+        hi = mid-1;
+      } else if (is_virtual > mid_virt) {
+        lo = mid+1;
+      } else {
+        return mid;  // exact match
+      }
+    }
+  }
+  return lo;  // inexact match
+}
+
+void Compile::register_intrinsic(CallGenerator* cg) {
+  if (_intrinsics == NULL) {
+    _intrinsics = new GrowableArray<CallGenerator*>(60);
+  }
+  // This code is stolen from ciObjectFactory::insert.
+  // Really, GrowableArray should have methods for
+  // insert_at, remove_at, and binary_search.
+  int len = _intrinsics->length();
+  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
+  if (index == len) {
+    _intrinsics->append(cg);
+  } else {
+#ifdef ASSERT
+    CallGenerator* oldcg = _intrinsics->at(index);
+    assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
+#endif
+    _intrinsics->append(_intrinsics->at(len-1));
+    int pos;
+    for (pos = len-2; pos >= index; pos--) {
+      _intrinsics->at_put(pos+1,_intrinsics->at(pos));
+    }
+    _intrinsics->at_put(index, cg);
+  }
+  assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
+}
+
+CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
+  assert(m->is_loaded(), "don't try this on unloaded methods");
+  if (_intrinsics != NULL) {
+    int index = intrinsic_insertion_index(m, is_virtual);
+    if (index < _intrinsics->length()
+        && _intrinsics->at(index)->method() == m
+        && _intrinsics->at(index)->is_virtual() == is_virtual) {
+      return _intrinsics->at(index);
+    }
+  }
+  // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
+  if (m->intrinsic_id() != vmIntrinsics::_none) {
+    CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
+    if (cg != NULL) {
+      // Save it for next time:
+      register_intrinsic(cg);
+      return cg;
+    } else {
+      gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
+    }
+  }
+  return NULL;
+}
+
+// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
+// in library_call.cpp.
+
+
+#ifndef PRODUCT
+// statistics gathering...
+
+juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
+jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
+
+bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
+  assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
+  int oflags = _intrinsic_hist_flags[id];
+  assert(flags != 0, "what happened?");
+  if (is_virtual) {
+    flags |= _intrinsic_virtual;
+  }
+  bool changed = (flags != oflags);
+  if ((flags & _intrinsic_worked) != 0) {
+    juint count = (_intrinsic_hist_count[id] += 1);
+    if (count == 1) {
+      changed = true;           // first time
+    }
+    // increment the overall count also:
+    _intrinsic_hist_count[vmIntrinsics::_none] += 1;
+  }
+  if (changed) {
+    if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
+      // Something changed about the intrinsic's virtuality.
+      if ((flags & _intrinsic_virtual) != 0) {
+        // This is the first use of this intrinsic as a virtual call.
+        if (oflags != 0) {
+          // We already saw it as a non-virtual, so note both cases.
+          flags |= _intrinsic_both;
+        }
+      } else if ((oflags & _intrinsic_both) == 0) {
+        // This is the first use of this intrinsic as a non-virtual
+        flags |= _intrinsic_both;
+      }
+    }
+    _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
+  }
+  // update the overall flags also:
+  _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
+  return changed;
+}
+
+static char* format_flags(int flags, char* buf) {
+  buf[0] = 0;
+  if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
+  if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
+  if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
+  if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
+  if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
+  if (buf[0] == 0)  strcat(buf, ",");
+  assert(buf[0] == ',', "must be");
+  return &buf[1];
+}
+
+void Compile::print_intrinsic_statistics() {
+  char flagsbuf[100];
+  ttyLocker ttyl;
+  if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
+  tty->print_cr("Compiler intrinsic usage:");
+  juint total = _intrinsic_hist_count[vmIntrinsics::_none];
+  if (total == 0)  total = 1;  // avoid div0 in case of no successes
+  #define PRINT_STAT_LINE(name, c, f) \
+    tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
+  for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
+    vmIntrinsics::ID id = (vmIntrinsics::ID) index;
+    int   flags = _intrinsic_hist_flags[id];
+    juint count = _intrinsic_hist_count[id];
+    if ((flags | count) != 0) {
+      PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
+    }
+  }
+  PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
+  if (xtty != NULL)  xtty->tail("statistics");
+}
+
+void Compile::print_statistics() {
+  { ttyLocker ttyl;
+    if (xtty != NULL)  xtty->head("statistics type='opto'");
+    Parse::print_statistics();
+    PhaseCCP::print_statistics();
+    PhaseRegAlloc::print_statistics();
+    Scheduling::print_statistics();
+    PhasePeephole::print_statistics();
+    PhaseIdealLoop::print_statistics();
+    if (xtty != NULL)  xtty->tail("statistics");
+  }
+  if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
+    // put this under its own <statistics> element.
+    print_intrinsic_statistics();
+  }
+}
+#endif //PRODUCT
+
+// Support for bundling info
+Bundle* Compile::node_bundling(const Node *n) {
+  assert(valid_bundle_info(n), "oob");
+  return &_node_bundling_base[n->_idx];
+}
+
+bool Compile::valid_bundle_info(const Node *n) {
+  return (_node_bundling_limit > n->_idx);
+}
+
+
+// Identify all nodes that are reachable from below, useful.
+// Use breadth-first pass that records state in a Unique_Node_List,
+// recursive traversal is slower.
+void Compile::identify_useful_nodes(Unique_Node_List &useful) {
+  int estimated_worklist_size = unique();
+  useful.map( estimated_worklist_size, NULL );  // preallocate space
+
+  // Initialize worklist
+  if (root() != NULL)     { useful.push(root()); }
+  // If 'top' is cached, declare it useful to preserve cached node
+  if( cached_top_node() ) { useful.push(cached_top_node()); }
+
+  // Push all useful nodes onto the list, breadthfirst
+  for( uint next = 0; next < useful.size(); ++next ) {
+    assert( next < unique(), "Unique useful nodes < total nodes");
+    Node *n  = useful.at(next);
+    uint max = n->len();
+    for( uint i = 0; i < max; ++i ) {
+      Node *m = n->in(i);
+      if( m == NULL ) continue;
+      useful.push(m);
+    }
+  }
+}
+
+// Disconnect all useless nodes by disconnecting those at the boundary.
+void Compile::remove_useless_nodes(Unique_Node_List &useful) {
+  uint next = 0;
+  while( next < useful.size() ) {
+    Node *n = useful.at(next++);
+    // Use raw traversal of out edges since this code removes out edges
+    int max = n->outcnt();
+    for (int j = 0; j < max; ++j ) {
+      Node* child = n->raw_out(j);
+      if( ! useful.member(child) ) {
+        assert( !child->is_top() || child != top(),
+                "If top is cached in Compile object it is in useful list");
+        // Only need to remove this out-edge to the useless node
+        n->raw_del_out(j);
+        --j;
+        --max;
+      }
+    }
+    if (n->outcnt() == 1 && n->has_special_unique_user()) {
+      record_for_igvn( n->unique_out() );
+    }
+  }
+  debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
+}
+
+//------------------------------frame_size_in_words-----------------------------
+// frame_slots in units of words
+int Compile::frame_size_in_words() const {
+  // shift is 0 in LP32 and 1 in LP64
+  const int shift = (LogBytesPerWord - LogBytesPerInt);
+  int words = _frame_slots >> shift;
+  assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
+  return words;
+}
+
+// ============================================================================
+//------------------------------CompileWrapper---------------------------------
+class CompileWrapper : public StackObj {
+  Compile *const _compile;
+ public:
+  CompileWrapper(Compile* compile);
+
+  ~CompileWrapper();
+};
+
+CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
+  // the Compile* pointer is stored in the current ciEnv:
+  ciEnv* env = compile->env();
+  assert(env == ciEnv::current(), "must already be a ciEnv active");
+  assert(env->compiler_data() == NULL, "compile already active?");
+  env->set_compiler_data(compile);
+  assert(compile == Compile::current(), "sanity");
+
+  compile->set_type_dict(NULL);
+  compile->set_type_hwm(NULL);
+  compile->set_type_last_size(0);
+  compile->set_last_tf(NULL, NULL);
+  compile->set_indexSet_arena(NULL);
+  compile->set_indexSet_free_block_list(NULL);
+  compile->init_type_arena();
+  Type::Initialize(compile);
+  _compile->set_scratch_buffer_blob(NULL);
+  _compile->begin_method();
+}
+CompileWrapper::~CompileWrapper() {
+  if (_compile->failing()) {
+    _compile->print_method("Failed");
+  }
+  _compile->end_method();
+  if (_compile->scratch_buffer_blob() != NULL)
+    BufferBlob::free(_compile->scratch_buffer_blob());
+  _compile->env()->set_compiler_data(NULL);
+}
+
+
+//----------------------------print_compile_messages---------------------------
+void Compile::print_compile_messages() {
+#ifndef PRODUCT
+  // Check if recompiling
+  if (_subsume_loads == false && PrintOpto) {
+    // Recompiling without allowing machine instructions to subsume loads
+    tty->print_cr("*********************************************************");
+    tty->print_cr("** Bailout: Recompile without subsuming loads          **");
+    tty->print_cr("*********************************************************");
+  }
+  if (env()->break_at_compile()) {
+    // Open the debugger when compiing this method.
+    tty->print("### Breaking when compiling: ");
+    method()->print_short_name();
+    tty->cr();
+    BREAKPOINT;
+  }
+
+  if( PrintOpto ) {
+    if (is_osr_compilation()) {
+      tty->print("[OSR]%3d", _compile_id);
+    } else {
+      tty->print("%3d", _compile_id);
+    }
+  }
+#endif
+}
+
+
+void Compile::init_scratch_buffer_blob() {
+  if( scratch_buffer_blob() != NULL )  return;
+
+  // Construct a temporary CodeBuffer to have it construct a BufferBlob
+  // Cache this BufferBlob for this compile.
+  ResourceMark rm;
+  int size = (MAX_inst_size + MAX_stubs_size + MAX_const_size);
+  BufferBlob* blob = BufferBlob::create("Compile::scratch_buffer", size);
+  // Record the buffer blob for next time.
+  set_scratch_buffer_blob(blob);
+  guarantee(scratch_buffer_blob() != NULL, "Need BufferBlob for code generation");
+
+  // Initialize the relocation buffers
+  relocInfo* locs_buf = (relocInfo*) blob->instructions_end() - MAX_locs_size;
+  set_scratch_locs_memory(locs_buf);
+}
+
+
+//-----------------------scratch_emit_size-------------------------------------
+// Helper function that computes size by emitting code
+uint Compile::scratch_emit_size(const Node* n) {
+  // Emit into a trash buffer and count bytes emitted.
+  // This is a pretty expensive way to compute a size,
+  // but it works well enough if seldom used.
+  // All common fixed-size instructions are given a size
+  // method by the AD file.
+  // Note that the scratch buffer blob and locs memory are
+  // allocated at the beginning of the compile task, and
+  // may be shared by several calls to scratch_emit_size.
+  // The allocation of the scratch buffer blob is particularly
+  // expensive, since it has to grab the code cache lock.
+  BufferBlob* blob = this->scratch_buffer_blob();
+  assert(blob != NULL, "Initialize BufferBlob at start");
+  assert(blob->size() > MAX_inst_size, "sanity");
+  relocInfo* locs_buf = scratch_locs_memory();
+  address blob_begin = blob->instructions_begin();
+  address blob_end   = (address)locs_buf;
+  assert(blob->instructions_contains(blob_end), "sanity");
+  CodeBuffer buf(blob_begin, blob_end - blob_begin);
+  buf.initialize_consts_size(MAX_const_size);
+  buf.initialize_stubs_size(MAX_stubs_size);
+  assert(locs_buf != NULL, "sanity");
+  int lsize = MAX_locs_size / 2;
+  buf.insts()->initialize_shared_locs(&locs_buf[0],     lsize);
+  buf.stubs()->initialize_shared_locs(&locs_buf[lsize], lsize);
+  n->emit(buf, this->regalloc());
+  return buf.code_size();
+}
+
+void  Compile::record_for_escape_analysis(Node* n) {
+  if (_congraph != NULL)
+    _congraph->record_for_escape_analysis(n);
+}
+
+
+// ============================================================================
+//------------------------------Compile standard-------------------------------
+debug_only( int Compile::_debug_idx = 100000; )
+
+// Compile a method.  entry_bci is -1 for normal compilations and indicates
+// the continuation bci for on stack replacement.
+
+
+Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci, bool subsume_loads )
+                : Phase(Compiler),
+                  _env(ci_env),
+                  _log(ci_env->log()),
+                  _compile_id(ci_env->compile_id()),
+                  _save_argument_registers(false),
+                  _stub_name(NULL),
+                  _stub_function(NULL),
+                  _stub_entry_point(NULL),
+                  _method(target),
+                  _entry_bci(osr_bci),
+                  _initial_gvn(NULL),
+                  _for_igvn(NULL),
+                  _warm_calls(NULL),
+                  _subsume_loads(subsume_loads),
+                  _failure_reason(NULL),
+                  _code_buffer("Compile::Fill_buffer"),
+                  _orig_pc_slot(0),
+                  _orig_pc_slot_offset_in_bytes(0),
+                  _node_bundling_limit(0),
+                  _node_bundling_base(NULL),
+#ifndef PRODUCT
+                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
+                  _printer(IdealGraphPrinter::printer()),
+#endif
+                  _congraph(NULL) {
+  C = this;
+
+  CompileWrapper cw(this);
+#ifndef PRODUCT
+  if (TimeCompiler2) {
+    tty->print(" ");
+    target->holder()->name()->print();
+    tty->print(".");
+    target->print_short_name();
+    tty->print("  ");
+  }
+  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
+  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
+  set_print_assembly(PrintOptoAssembly || _method->should_print_assembly());
+#endif
+
+  if (ProfileTraps) {
+    // Make sure the method being compiled gets its own MDO,
+    // so we can at least track the decompile_count().
+    method()->build_method_data();
+  }
+
+  Init(::AliasLevel);
+
+
+  print_compile_messages();
+
+  if (UseOldInlining || PrintCompilation NOT_PRODUCT( || PrintOpto) )
+    _ilt = InlineTree::build_inline_tree_root();
+  else
+    _ilt = NULL;
+
+  // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
+  assert(num_alias_types() >= AliasIdxRaw, "");
+
+#define MINIMUM_NODE_HASH  1023
+  // Node list that Iterative GVN will start with
+  Unique_Node_List for_igvn(comp_arena());
+  set_for_igvn(&for_igvn);
+
+  // GVN that will be run immediately on new nodes
+  uint estimated_size = method()->code_size()*4+64;
+  estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
+  PhaseGVN gvn(node_arena(), estimated_size);
+  set_initial_gvn(&gvn);
+
+  if (DoEscapeAnalysis)
+    _congraph = new ConnectionGraph(this);
+
+  { // Scope for timing the parser
+    TracePhase t3("parse", &_t_parser, true);
+
+    // Put top into the hash table ASAP.
+    initial_gvn()->transform_no_reclaim(top());
+
+    // Set up tf(), start(), and find a CallGenerator.
+    CallGenerator* cg;
+    if (is_osr_compilation()) {
+      const TypeTuple *domain = StartOSRNode::osr_domain();
+      const TypeTuple *range = TypeTuple::make_range(method()->signature());
+      init_tf(TypeFunc::make(domain, range));
+      StartNode* s = new (this, 2) StartOSRNode(root(), domain);
+      initial_gvn()->set_type_bottom(s);
+      init_start(s);
+      cg = CallGenerator::for_osr(method(), entry_bci());
+    } else {
+      // Normal case.
+      init_tf(TypeFunc::make(method()));
+      StartNode* s = new (this, 2) StartNode(root(), tf()->domain());
+      initial_gvn()->set_type_bottom(s);
+      init_start(s);
+      float past_uses = method()->interpreter_invocation_count();
+      float expected_uses = past_uses;
+      cg = CallGenerator::for_inline(method(), expected_uses);
+    }
+    if (failing())  return;
+    if (cg == NULL) {
+      record_method_not_compilable_all_tiers("cannot parse method");
+      return;
+    }
+    JVMState* jvms = build_start_state(start(), tf());
+    if ((jvms = cg->generate(jvms)) == NULL) {
+      record_method_not_compilable("method parse failed");
+      return;
+    }
+    GraphKit kit(jvms);
+
+    if (!kit.stopped()) {
+      // Accept return values, and transfer control we know not where.
+      // This is done by a special, unique ReturnNode bound to root.
+      return_values(kit.jvms());
+    }
+
+    if (kit.has_exceptions()) {
+      // Any exceptions that escape from this call must be rethrown
+      // to whatever caller is dynamically above us on the stack.
+      // This is done by a special, unique RethrowNode bound to root.
+      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
+    }
+
+    // Remove clutter produced by parsing.
+    if (!failing()) {
+      ResourceMark rm;
+      PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
+    }
+  }
+
+  // Note:  Large methods are capped off in do_one_bytecode().
+  if (failing())  return;
+
+  // After parsing, node notes are no longer automagic.
+  // They must be propagated by register_new_node_with_optimizer(),
+  // clone(), or the like.
+  set_default_node_notes(NULL);
+
+  for (;;) {
+    int successes = Inline_Warm();
+    if (failing())  return;
+    if (successes == 0)  break;
+  }
+
+  // Drain the list.
+  Finish_Warm();
+#ifndef PRODUCT
+  if (_printer) {
+    _printer->print_inlining(this);
+  }
+#endif
+
+  if (failing())  return;
+  NOT_PRODUCT( verify_graph_edges(); )
+
+  // Perform escape analysis
+  if (_congraph != NULL) {
+    NOT_PRODUCT( TracePhase t2("escapeAnalysis", &_t_escapeAnalysis, TimeCompiler); )
+    _congraph->compute_escape();
+#ifndef PRODUCT
+    if (PrintEscapeAnalysis) {
+      _congraph->dump();
+    }
+#endif
+  }
+  // Now optimize
+  Optimize();
+  if (failing())  return;
+  NOT_PRODUCT( verify_graph_edges(); )
+
+#ifndef PRODUCT
+  if (PrintIdeal) {
+    ttyLocker ttyl;  // keep the following output all in one block
+    // This output goes directly to the tty, not the compiler log.
+    // To enable tools to match it up with the compilation activity,
+    // be sure to tag this tty output with the compile ID.
+    if (xtty != NULL) {
+      xtty->head("ideal compile_id='%d'%s", compile_id(),
+                 is_osr_compilation()    ? " compile_kind='osr'" :
+                 "");
+    }
+    root()->dump(9999);
+    if (xtty != NULL) {
+      xtty->tail("ideal");
+    }
+  }
+#endif
+
+  // Now that we know the size of all the monitors we can add a fixed slot
+  // for the original deopt pc.
+
+  _orig_pc_slot =  fixed_slots();
+  int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
+  set_fixed_slots(next_slot);
+
+  // Now generate code
+  Code_Gen();
+  if (failing())  return;
+
+  // Check if we want to skip execution of all compiled code.
+  {
+#ifndef PRODUCT
+    if (OptoNoExecute) {
+      record_method_not_compilable("+OptoNoExecute");  // Flag as failed
+      return;
+    }
+    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
+#endif
+
+    if (is_osr_compilation()) {
+      _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
+      _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
+    } else {
+      _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
+      _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
+    }
+
+    env()->register_method(_method, _entry_bci,
+                           &_code_offsets,
+                           _orig_pc_slot_offset_in_bytes,
+                           code_buffer(),
+                           frame_size_in_words(), _oop_map_set,
+                           &_handler_table, &_inc_table,
+                           compiler,
+                           env()->comp_level(),
+                           true, /*has_debug_info*/
+                           has_unsafe_access()
+                           );
+  }
+}
+
+//------------------------------Compile----------------------------------------
+// Compile a runtime stub
+Compile::Compile( ciEnv* ci_env,
+                  TypeFunc_generator generator,
+                  address stub_function,
+                  const char *stub_name,
+                  int is_fancy_jump,
+                  bool pass_tls,
+                  bool save_arg_registers,
+                  bool return_pc )
+  : Phase(Compiler),
+    _env(ci_env),
+    _log(ci_env->log()),
+    _compile_id(-1),
+    _save_argument_registers(save_arg_registers),
+    _method(NULL),
+    _stub_name(stub_name),
+    _stub_function(stub_function),
+    _stub_entry_point(NULL),
+    _entry_bci(InvocationEntryBci),
+    _initial_gvn(NULL),
+    _for_igvn(NULL),
+    _warm_calls(NULL),
+    _orig_pc_slot(0),
+    _orig_pc_slot_offset_in_bytes(0),
+    _subsume_loads(true),
+    _failure_reason(NULL),
+    _code_buffer("Compile::Fill_buffer"),
+    _node_bundling_limit(0),
+    _node_bundling_base(NULL),
+#ifndef PRODUCT
+    _trace_opto_output(TraceOptoOutput),
+    _printer(NULL),
+#endif
+    _congraph(NULL) {
+  C = this;
+
+#ifndef PRODUCT
+  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
+  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
+  set_print_assembly(PrintFrameConverterAssembly);
+#endif
+  CompileWrapper cw(this);
+  Init(/*AliasLevel=*/ 0);
+  init_tf((*generator)());
+
+  {
+    // The following is a dummy for the sake of GraphKit::gen_stub
+    Unique_Node_List for_igvn(comp_arena());
+    set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
+    PhaseGVN gvn(Thread::current()->resource_area(),255);
+    set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
+    gvn.transform_no_reclaim(top());
+
+    GraphKit kit;
+    kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
+  }
+
+  NOT_PRODUCT( verify_graph_edges(); )
+  Code_Gen();
+  if (failing())  return;
+
+
+  // Entry point will be accessed using compile->stub_entry_point();
+  if (code_buffer() == NULL) {
+    Matcher::soft_match_failure();
+  } else {
+    if (PrintAssembly && (WizardMode || Verbose))
+      tty->print_cr("### Stub::%s", stub_name);
+
+    if (!failing()) {
+      assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
+
+      // Make the NMethod
+      // For now we mark the frame as never safe for profile stackwalking
+      RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
+                                                      code_buffer(),
+                                                      CodeOffsets::frame_never_safe,
+                                                      // _code_offsets.value(CodeOffsets::Frame_Complete),
+                                                      frame_size_in_words(),
+                                                      _oop_map_set,
+                                                      save_arg_registers);
+      assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
+
+      _stub_entry_point = rs->entry_point();
+    }
+  }
+}
+
+#ifndef PRODUCT
+void print_opto_verbose_signature( const TypeFunc *j_sig, const char *stub_name ) {
+  if(PrintOpto && Verbose) {
+    tty->print("%s   ", stub_name); j_sig->print_flattened(); tty->cr();
+  }
+}
+#endif
+
+void Compile::print_codes() {
+}
+
+//------------------------------Init-------------------------------------------
+// Prepare for a single compilation
+void Compile::Init(int aliaslevel) {
+  _unique  = 0;
+  _regalloc = NULL;
+
+  _tf      = NULL;  // filled in later
+  _top     = NULL;  // cached later
+  _matcher = NULL;  // filled in later
+  _cfg     = NULL;  // filled in later
+
+  set_24_bit_selection_and_mode(Use24BitFP, false);
+
+  _node_note_array = NULL;
+  _default_node_notes = NULL;
+
+  _immutable_memory = NULL; // filled in at first inquiry
+
+  // Globally visible Nodes
+  // First set TOP to NULL to give safe behavior during creation of RootNode
+  set_cached_top_node(NULL);
+  set_root(new (this, 3) RootNode());
+  // Now that you have a Root to point to, create the real TOP
+  set_cached_top_node( new (this, 1) ConNode(Type::TOP) );
+  set_recent_alloc(NULL, NULL);
+
+  // Create Debug Information Recorder to record scopes, oopmaps, etc.
+  env()->set_oop_recorder(new OopRecorder(comp_arena()));
+  env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
+  env()->set_dependencies(new Dependencies(env()));
+
+  _fixed_slots = 0;
+  set_has_split_ifs(false);
+  set_has_loops(has_method() && method()->has_loops()); // first approximation
+  _deopt_happens = true;  // start out assuming the worst
+  _trap_can_recompile = false;  // no traps emitted yet
+  _major_progress = true; // start out assuming good things will happen
+  set_has_unsafe_access(false);
+  Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
+  set_decompile_count(0);
+
+  // Compilation level related initialization
+  if (env()->comp_level() == CompLevel_fast_compile) {
+    set_num_loop_opts(Tier1LoopOptsCount);
+    set_do_inlining(Tier1Inline != 0);
+    set_max_inline_size(Tier1MaxInlineSize);
+    set_freq_inline_size(Tier1FreqInlineSize);
+    set_do_scheduling(false);
+    set_do_count_invocations(Tier1CountInvocations);
+    set_do_method_data_update(Tier1UpdateMethodData);
+  } else {
+    assert(env()->comp_level() == CompLevel_full_optimization, "unknown comp level");
+    set_num_loop_opts(LoopOptsCount);
+    set_do_inlining(Inline);
+    set_max_inline_size(MaxInlineSize);
+    set_freq_inline_size(FreqInlineSize);
+    set_do_scheduling(OptoScheduling);
+    set_do_count_invocations(false);
+    set_do_method_data_update(false);
+  }
+
+  if (debug_info()->recording_non_safepoints()) {
+    set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
+                        (comp_arena(), 8, 0, NULL));
+    set_default_node_notes(Node_Notes::make(this));
+  }
+
+  // // -- Initialize types before each compile --
+  // // Update cached type information
+  // if( _method && _method->constants() )
+  //   Type::update_loaded_types(_method, _method->constants());
+
+  // Init alias_type map.
+  if (!DoEscapeAnalysis && aliaslevel == 3)
+    aliaslevel = 2;  // No unique types without escape analysis
+  _AliasLevel = aliaslevel;
+  const int grow_ats = 16;
+  _max_alias_types = grow_ats;
+  _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
+  AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
+  Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
+  {
+    for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
+  }
+  // Initialize the first few types.
+  _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
+  _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
+  _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
+  _num_alias_types = AliasIdxRaw+1;
+  // Zero out the alias type cache.
+  Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
+  // A NULL adr_type hits in the cache right away.  Preload the right answer.
+  probe_alias_cache(NULL)->_index = AliasIdxTop;
+
+  _intrinsics = NULL;
+  _macro_nodes = new GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
+  register_library_intrinsics();
+}
+
+//---------------------------init_start----------------------------------------
+// Install the StartNode on this compile object.
+void Compile::init_start(StartNode* s) {
+  if (failing())
+    return; // already failing
+  assert(s == start(), "");
+}
+
+StartNode* Compile::start() const {
+  assert(!failing(), "");
+  for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
+    Node* start = root()->fast_out(i);
+    if( start->is_Start() )
+      return start->as_Start();
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//-------------------------------immutable_memory-------------------------------------
+// Access immutable memory
+Node* Compile::immutable_memory() {
+  if (_immutable_memory != NULL) {
+    return _immutable_memory;
+  }
+  StartNode* s = start();
+  for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
+    Node *p = s->fast_out(i);
+    if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
+      _immutable_memory = p;
+      return _immutable_memory;
+    }
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//----------------------set_cached_top_node------------------------------------
+// Install the cached top node, and make sure Node::is_top works correctly.
+void Compile::set_cached_top_node(Node* tn) {
+  if (tn != NULL)  verify_top(tn);
+  Node* old_top = _top;
+  _top = tn;
+  // Calling Node::setup_is_top allows the nodes the chance to adjust
+  // their _out arrays.
+  if (_top != NULL)     _top->setup_is_top();
+  if (old_top != NULL)  old_top->setup_is_top();
+  assert(_top == NULL || top()->is_top(), "");
+}
+
+#ifndef PRODUCT
+void Compile::verify_top(Node* tn) const {
+  if (tn != NULL) {
+    assert(tn->is_Con(), "top node must be a constant");
+    assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
+    assert(tn->in(0) != NULL, "must have live top node");
+  }
+}
+#endif
+
+
+///-------------------Managing Per-Node Debug & Profile Info-------------------
+
+void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
+  guarantee(arr != NULL, "");
+  int num_blocks = arr->length();
+  if (grow_by < num_blocks)  grow_by = num_blocks;
+  int num_notes = grow_by * _node_notes_block_size;
+  Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
+  Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
+  while (num_notes > 0) {
+    arr->append(notes);
+    notes     += _node_notes_block_size;
+    num_notes -= _node_notes_block_size;
+  }
+  assert(num_notes == 0, "exact multiple, please");
+}
+
+bool Compile::copy_node_notes_to(Node* dest, Node* source) {
+  if (source == NULL || dest == NULL)  return false;
+
+  if (dest->is_Con())
+    return false;               // Do not push debug info onto constants.
+
+#ifdef ASSERT
+  // Leave a bread crumb trail pointing to the original node:
+  if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
+    dest->set_debug_orig(source);
+  }
+#endif
+
+  if (node_note_array() == NULL)
+    return false;               // Not collecting any notes now.
+
+  // This is a copy onto a pre-existing node, which may already have notes.
+  // If both nodes have notes, do not overwrite any pre-existing notes.
+  Node_Notes* source_notes = node_notes_at(source->_idx);
+  if (source_notes == NULL || source_notes->is_clear())  return false;
+  Node_Notes* dest_notes   = node_notes_at(dest->_idx);
+  if (dest_notes == NULL || dest_notes->is_clear()) {
+    return set_node_notes_at(dest->_idx, source_notes);
+  }
+
+  Node_Notes merged_notes = (*source_notes);
+  // The order of operations here ensures that dest notes will win...
+  merged_notes.update_from(dest_notes);
+  return set_node_notes_at(dest->_idx, &merged_notes);
+}
+
+
+//--------------------------allow_range_check_smearing-------------------------
+// Gating condition for coalescing similar range checks.
+// Sometimes we try 'speculatively' replacing a series of a range checks by a
+// single covering check that is at least as strong as any of them.
+// If the optimization succeeds, the simplified (strengthened) range check
+// will always succeed.  If it fails, we will deopt, and then give up
+// on the optimization.
+bool Compile::allow_range_check_smearing() const {
+  // If this method has already thrown a range-check,
+  // assume it was because we already tried range smearing
+  // and it failed.
+  uint already_trapped = trap_count(Deoptimization::Reason_range_check);
+  return !already_trapped;
+}
+
+
+//------------------------------flatten_alias_type-----------------------------
+const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
+  int offset = tj->offset();
+  TypePtr::PTR ptr = tj->ptr();
+
+  // Process weird unsafe references.
+  if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
+    assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
+    tj = TypeOopPtr::BOTTOM;
+    ptr = tj->ptr();
+    offset = tj->offset();
+  }
+
+  // Array pointers need some flattening
+  const TypeAryPtr *ta = tj->isa_aryptr();
+  if( ta && _AliasLevel >= 2 ) {
+    // For arrays indexed by constant indices, we flatten the alias
+    // space to include all of the array body.  Only the header, klass
+    // and array length can be accessed un-aliased.
+    if( offset != Type::OffsetBot ) {
+      if( ta->const_oop() ) { // methodDataOop or methodOop
+        offset = Type::OffsetBot;   // Flatten constant access into array body
+        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+      } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
+        // range is OK as-is.
+        tj = ta = TypeAryPtr::RANGE;
+      } else if( offset == oopDesc::klass_offset_in_bytes() ) {
+        tj = TypeInstPtr::KLASS; // all klass loads look alike
+        ta = TypeAryPtr::RANGE; // generic ignored junk
+        ptr = TypePtr::BotPTR;
+      } else if( offset == oopDesc::mark_offset_in_bytes() ) {
+        tj = TypeInstPtr::MARK;
+        ta = TypeAryPtr::RANGE; // generic ignored junk
+        ptr = TypePtr::BotPTR;
+      } else {                  // Random constant offset into array body
+        offset = Type::OffsetBot;   // Flatten constant access into array body
+        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,Type::OffsetBot, ta->instance_id());
+      }
+    }
+    // Arrays of fixed size alias with arrays of unknown size.
+    if (ta->size() != TypeInt::POS) {
+      const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset, ta->instance_id());
+    }
+    // Arrays of known objects become arrays of unknown objects.
+    if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
+      const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset, ta->instance_id());
+    }
+    // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
+    // cannot be distinguished by bytecode alone.
+    if (ta->elem() == TypeInt::BOOL) {
+      const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
+      ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
+      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset, ta->instance_id());
+    }
+    // During the 2nd round of IterGVN, NotNull castings are removed.
+    // Make sure the Bottom and NotNull variants alias the same.
+    // Also, make sure exact and non-exact variants alias the same.
+    if( ptr == TypePtr::NotNull || ta->klass_is_exact() ) {
+      if (ta->const_oop()) {
+        tj = ta = TypeAryPtr::make(TypePtr::Constant,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
+      } else {
+        tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
+      }
+    }
+  }
+
+  // Oop pointers need some flattening
+  const TypeInstPtr *to = tj->isa_instptr();
+  if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
+    if( ptr == TypePtr::Constant ) {
+      // No constant oop pointers (such as Strings); they alias with
+      // unknown strings.
+      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
+    } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
+      // During the 2nd round of IterGVN, NotNull castings are removed.
+      // Make sure the Bottom and NotNull variants alias the same.
+      // Also, make sure exact and non-exact variants alias the same.
+      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset, to->instance_id());
+    }
+    // Canonicalize the holder of this field
+    ciInstanceKlass *k = to->klass()->as_instance_klass();
+    if (offset >= 0 && offset < oopDesc::header_size() * wordSize) {
+      // First handle header references such as a LoadKlassNode, even if the
+      // object's klass is unloaded at compile time (4965979).
+      tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset, to->instance_id());
+    } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
+      to = NULL;
+      tj = TypeOopPtr::BOTTOM;
+      offset = tj->offset();
+    } else {
+      ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
+      if (!k->equals(canonical_holder) || tj->offset() != offset) {
+        tj = to = TypeInstPtr::make(TypePtr::BotPTR, canonical_holder, false, NULL, offset, to->instance_id());
+      }
+    }
+  }
+
+  // Klass pointers to object array klasses need some flattening
+  const TypeKlassPtr *tk = tj->isa_klassptr();
+  if( tk ) {
+    // If we are referencing a field within a Klass, we need
+    // to assume the worst case of an Object.  Both exact and
+    // inexact types must flatten to the same alias class.
+    // Since the flattened result for a klass is defined to be
+    // precisely java.lang.Object, use a constant ptr.
+    if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
+
+      tj = tk = TypeKlassPtr::make(TypePtr::Constant,
+                                   TypeKlassPtr::OBJECT->klass(),
+                                   offset);
+    }
+
+    ciKlass* klass = tk->klass();
+    if( klass->is_obj_array_klass() ) {
+      ciKlass* k = TypeAryPtr::OOPS->klass();
+      if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
+        k = TypeInstPtr::BOTTOM->klass();
+      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
+    }
+
+    // Check for precise loads from the primary supertype array and force them
+    // to the supertype cache alias index.  Check for generic array loads from
+    // the primary supertype array and also force them to the supertype cache
+    // alias index.  Since the same load can reach both, we need to merge
+    // these 2 disparate memories into the same alias class.  Since the
+    // primary supertype array is read-only, there's no chance of confusion
+    // where we bypass an array load and an array store.
+    uint off2 = offset - Klass::primary_supers_offset_in_bytes();
+    if( offset == Type::OffsetBot ||
+        off2 < Klass::primary_super_limit()*wordSize ) {
+      offset = sizeof(oopDesc) +Klass::secondary_super_cache_offset_in_bytes();
+      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
+    }
+  }
+
+  // Flatten all Raw pointers together.
+  if (tj->base() == Type::RawPtr)
+    tj = TypeRawPtr::BOTTOM;
+
+  if (tj->base() == Type::AnyPtr)
+    tj = TypePtr::BOTTOM;      // An error, which the caller must check for.
+
+  // Flatten all to bottom for now
+  switch( _AliasLevel ) {
+  case 0:
+    tj = TypePtr::BOTTOM;
+    break;
+  case 1:                       // Flatten to: oop, static, field or array
+    switch (tj->base()) {
+    //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
+    case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
+    case Type::AryPtr:   // do not distinguish arrays at all
+    case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
+    case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
+    case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
+    default: ShouldNotReachHere();
+    }
+    break;
+  case 2:                       // No collasping at level 2; keep all splits
+  case 3:                       // No collasping at level 3; keep all splits
+    break;
+  default:
+    Unimplemented();
+  }
+
+  offset = tj->offset();
+  assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
+
+  assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
+          (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
+          (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
+          (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
+          (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
+          (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
+          (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
+          "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
+  assert( tj->ptr() != TypePtr::TopPTR &&
+          tj->ptr() != TypePtr::AnyNull &&
+          tj->ptr() != TypePtr::Null, "No imprecise addresses" );
+//    assert( tj->ptr() != TypePtr::Constant ||
+//            tj->base() == Type::RawPtr ||
+//            tj->base() == Type::KlassPtr, "No constant oop addresses" );
+
+  return tj;
+}
+
+void Compile::AliasType::Init(int i, const TypePtr* at) {
+  _index = i;
+  _adr_type = at;
+  _field = NULL;
+  _is_rewritable = true; // default
+  const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
+  if (atoop != NULL && atoop->is_instance()) {
+    const TypeOopPtr *gt = atoop->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+    _general_index = Compile::current()->get_alias_index(gt);
+  } else {
+    _general_index = 0;
+  }
+}
+
+//---------------------------------print_on------------------------------------
+#ifndef PRODUCT
+void Compile::AliasType::print_on(outputStream* st) {
+  if (index() < 10)
+        st->print("@ <%d> ", index());
+  else  st->print("@ <%d>",  index());
+  st->print(is_rewritable() ? "   " : " RO");
+  int offset = adr_type()->offset();
+  if (offset == Type::OffsetBot)
+        st->print(" +any");
+  else  st->print(" +%-3d", offset);
+  st->print(" in ");
+  adr_type()->dump_on(st);
+  const TypeOopPtr* tjp = adr_type()->isa_oopptr();
+  if (field() != NULL && tjp) {
+    if (tjp->klass()  != field()->holder() ||
+        tjp->offset() != field()->offset_in_bytes()) {
+      st->print(" != ");
+      field()->print();
+      st->print(" ***");
+    }
+  }
+}
+
+void print_alias_types() {
+  Compile* C = Compile::current();
+  tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
+  for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
+    C->alias_type(idx)->print_on(tty);
+    tty->cr();
+  }
+}
+#endif
+
+
+//----------------------------probe_alias_cache--------------------------------
+Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
+  intptr_t key = (intptr_t) adr_type;
+  key ^= key >> logAliasCacheSize;
+  return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
+}
+
+
+//-----------------------------grow_alias_types--------------------------------
+void Compile::grow_alias_types() {
+  const int old_ats  = _max_alias_types; // how many before?
+  const int new_ats  = old_ats;          // how many more?
+  const int grow_ats = old_ats+new_ats;  // how many now?
+  _max_alias_types = grow_ats;
+  _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
+  AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
+  Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
+  for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
+}
+
+
+//--------------------------------find_alias_type------------------------------
+Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create) {
+  if (_AliasLevel == 0)
+    return alias_type(AliasIdxBot);
+
+  AliasCacheEntry* ace = probe_alias_cache(adr_type);
+  if (ace->_adr_type == adr_type) {
+    return alias_type(ace->_index);
+  }
+
+  // Handle special cases.
+  if (adr_type == NULL)             return alias_type(AliasIdxTop);
+  if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);
+
+  // Do it the slow way.
+  const TypePtr* flat = flatten_alias_type(adr_type);
+
+#ifdef ASSERT
+  assert(flat == flatten_alias_type(flat), "idempotent");
+  assert(flat != TypePtr::BOTTOM,     "cannot alias-analyze an untyped ptr");
+  if (flat->isa_oopptr() && !flat->isa_klassptr()) {
+    const TypeOopPtr* foop = flat->is_oopptr();
+    const TypePtr* xoop = foop->cast_to_exactness(!foop->klass_is_exact())->is_ptr();
+    assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
+  }
+  assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
+#endif
+
+  int idx = AliasIdxTop;
+  for (int i = 0; i < num_alias_types(); i++) {
+    if (alias_type(i)->adr_type() == flat) {
+      idx = i;
+      break;
+    }
+  }
+
+  if (idx == AliasIdxTop) {
+    if (no_create)  return NULL;
+    // Grow the array if necessary.
+    if (_num_alias_types == _max_alias_types)  grow_alias_types();
+    // Add a new alias type.
+    idx = _num_alias_types++;
+    _alias_types[idx]->Init(idx, flat);
+    if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
+    if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
+    if (flat->isa_instptr()) {
+      if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
+          && flat->is_instptr()->klass() == env()->Class_klass())
+        alias_type(idx)->set_rewritable(false);
+    }
+    if (flat->isa_klassptr()) {
+      if (flat->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc))
+        alias_type(idx)->set_rewritable(false);
+      if (flat->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+        alias_type(idx)->set_rewritable(false);
+      if (flat->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc))
+        alias_type(idx)->set_rewritable(false);
+      if (flat->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc))
+        alias_type(idx)->set_rewritable(false);
+    }
+    // %%% (We would like to finalize JavaThread::threadObj_offset(),
+    // but the base pointer type is not distinctive enough to identify
+    // references into JavaThread.)
+
+    // Check for final instance fields.
+    const TypeInstPtr* tinst = flat->isa_instptr();
+    if (tinst && tinst->offset() >= oopDesc::header_size() * wordSize) {
+      ciInstanceKlass *k = tinst->klass()->as_instance_klass();
+      ciField* field = k->get_field_by_offset(tinst->offset(), false);
+      // Set field() and is_rewritable() attributes.
+      if (field != NULL)  alias_type(idx)->set_field(field);
+    }
+    const TypeKlassPtr* tklass = flat->isa_klassptr();
+    // Check for final static fields.
+    if (tklass && tklass->klass()->is_instance_klass()) {
+      ciInstanceKlass *k = tklass->klass()->as_instance_klass();
+      ciField* field = k->get_field_by_offset(tklass->offset(), true);
+      // Set field() and is_rewritable() attributes.
+      if (field != NULL)   alias_type(idx)->set_field(field);
+    }
+  }
+
+  // Fill the cache for next time.
+  ace->_adr_type = adr_type;
+  ace->_index    = idx;
+  assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");
+
+  // Might as well try to fill the cache for the flattened version, too.
+  AliasCacheEntry* face = probe_alias_cache(flat);
+  if (face->_adr_type == NULL) {
+    face->_adr_type = flat;
+    face->_index    = idx;
+    assert(alias_type(flat) == alias_type(idx), "flat type must work too");
+  }
+
+  return alias_type(idx);
+}
+
+
+Compile::AliasType* Compile::alias_type(ciField* field) {
+  const TypeOopPtr* t;
+  if (field->is_static())
+    t = TypeKlassPtr::make(field->holder());
+  else
+    t = TypeOopPtr::make_from_klass_raw(field->holder());
+  AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()));
+  assert(field->is_final() == !atp->is_rewritable(), "must get the rewritable bits correct");
+  return atp;
+}
+
+
+//------------------------------have_alias_type--------------------------------
+bool Compile::have_alias_type(const TypePtr* adr_type) {
+  AliasCacheEntry* ace = probe_alias_cache(adr_type);
+  if (ace->_adr_type == adr_type) {
+    return true;
+  }
+
+  // Handle special cases.
+  if (adr_type == NULL)             return true;
+  if (adr_type == TypePtr::BOTTOM)  return true;
+
+  return find_alias_type(adr_type, true) != NULL;
+}
+
+//-----------------------------must_alias--------------------------------------
+// True if all values of the given address type are in the given alias category.
+bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
+  if (alias_idx == AliasIdxBot)         return true;  // the universal category
+  if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
+  if (alias_idx == AliasIdxTop)         return false; // the empty category
+  if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
+
+  // the only remaining possible overlap is identity
+  int adr_idx = get_alias_index(adr_type);
+  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
+  assert(adr_idx == alias_idx ||
+         (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
+          && adr_type                       != TypeOopPtr::BOTTOM),
+         "should not be testing for overlap with an unsafe pointer");
+  return adr_idx == alias_idx;
+}
+
+//------------------------------can_alias--------------------------------------
+// True if any values of the given address type are in the given alias category.
+bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
+  if (alias_idx == AliasIdxTop)         return false; // the empty category
+  if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
+  if (alias_idx == AliasIdxBot)         return true;  // the universal category
+  if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins
+
+  // the only remaining possible overlap is identity
+  int adr_idx = get_alias_index(adr_type);
+  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
+  return adr_idx == alias_idx;
+}
+
+
+
+//---------------------------pop_warm_call-------------------------------------
+WarmCallInfo* Compile::pop_warm_call() {
+  WarmCallInfo* wci = _warm_calls;
+  if (wci != NULL)  _warm_calls = wci->remove_from(wci);
+  return wci;
+}
+
+//----------------------------Inline_Warm--------------------------------------
+int Compile::Inline_Warm() {
+  // If there is room, try to inline some more warm call sites.
+  // %%% Do a graph index compaction pass when we think we're out of space?
+  if (!InlineWarmCalls)  return 0;
+
+  int calls_made_hot = 0;
+  int room_to_grow   = NodeCountInliningCutoff - unique();
+  int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
+  int amount_grown   = 0;
+  WarmCallInfo* call;
+  while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
+    int est_size = (int)call->size();
+    if (est_size > (room_to_grow - amount_grown)) {
+      // This one won't fit anyway.  Get rid of it.
+      call->make_cold();
+      continue;
+    }
+    call->make_hot();
+    calls_made_hot++;
+    amount_grown   += est_size;
+    amount_to_grow -= est_size;
+  }
+
+  if (calls_made_hot > 0)  set_major_progress();
+  return calls_made_hot;
+}
+
+
+//----------------------------Finish_Warm--------------------------------------
+void Compile::Finish_Warm() {
+  if (!InlineWarmCalls)  return;
+  if (failing())  return;
+  if (warm_calls() == NULL)  return;
+
+  // Clean up loose ends, if we are out of space for inlining.
+  WarmCallInfo* call;
+  while ((call = pop_warm_call()) != NULL) {
+    call->make_cold();
+  }
+}
+
+
+//------------------------------Optimize---------------------------------------
+// Given a graph, optimize it.
+void Compile::Optimize() {
+  TracePhase t1("optimizer", &_t_optimizer, true);
+
+#ifndef PRODUCT
+  if (env()->break_at_compile()) {
+    BREAKPOINT;
+  }
+
+#endif
+
+  ResourceMark rm;
+  int          loop_opts_cnt;
+
+  NOT_PRODUCT( verify_graph_edges(); )
+
+  print_method("Start");
+
+ {
+  // Iterative Global Value Numbering, including ideal transforms
+  // Initialize IterGVN with types and values from parse-time GVN
+  PhaseIterGVN igvn(initial_gvn());
+  {
+    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
+    igvn.optimize();
+  }
+
+  print_method("Iter GVN 1", 2);
+
+  if (failing())  return;
+
+  // get rid of the connection graph since it's information is not
+  // updated by optimizations
+  _congraph = NULL;
+
+
+  // Loop transforms on the ideal graph.  Range Check Elimination,
+  // peeling, unrolling, etc.
+
+  // Set loop opts counter
+  loop_opts_cnt = num_loop_opts();
+  if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
+    {
+      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      PhaseIdealLoop ideal_loop( igvn, NULL, true );
+      loop_opts_cnt--;
+      if (major_progress()) print_method("PhaseIdealLoop 1", 2);
+      if (failing())  return;
+    }
+    // Loop opts pass if partial peeling occurred in previous pass
+    if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
+      TracePhase t3("idealLoop", &_t_idealLoop, true);
+      PhaseIdealLoop ideal_loop( igvn, NULL, false );
+      loop_opts_cnt--;
+      if (major_progress()) print_method("PhaseIdealLoop 2", 2);
+      if (failing())  return;
+    }
+    // Loop opts pass for loop-unrolling before CCP
+    if(major_progress() && (loop_opts_cnt > 0)) {
+      TracePhase t4("idealLoop", &_t_idealLoop, true);
+      PhaseIdealLoop ideal_loop( igvn, NULL, false );
+      loop_opts_cnt--;
+      if (major_progress()) print_method("PhaseIdealLoop 3", 2);
+    }
+  }
+  if (failing())  return;
+
+  // Conditional Constant Propagation;
+  PhaseCCP ccp( &igvn );
+  assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
+  {
+    TracePhase t2("ccp", &_t_ccp, true);
+    ccp.do_transform();
+  }
+  print_method("PhaseCPP 1", 2);
+
+  assert( true, "Break here to ccp.dump_old2new_map()");
+
+  // Iterative Global Value Numbering, including ideal transforms
+  {
+    NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
+    igvn = ccp;
+    igvn.optimize();
+  }
+
+  print_method("Iter GVN 2", 2);
+
+  if (failing())  return;
+
+  // Loop transforms on the ideal graph.  Range Check Elimination,
+  // peeling, unrolling, etc.
+  if(loop_opts_cnt > 0) {
+    debug_only( int cnt = 0; );
+    while(major_progress() && (loop_opts_cnt > 0)) {
+      TracePhase t2("idealLoop", &_t_idealLoop, true);
+      assert( cnt++ < 40, "infinite cycle in loop optimization" );
+      PhaseIdealLoop ideal_loop( igvn, NULL, true );
+      loop_opts_cnt--;
+      if (major_progress()) print_method("PhaseIdealLoop iterations", 2);
+      if (failing())  return;
+    }
+  }
+  {
+    NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
+    PhaseMacroExpand  mex(igvn);
+    if (mex.expand_macro_nodes()) {
+      assert(failing(), "must bail out w/ explicit message");
+      return;
+    }
+  }
+
+ } // (End scope of igvn; run destructor if necessary for asserts.)
+
+  // A method with only infinite loops has no edges entering loops from root
+  {
+    NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
+    if (final_graph_reshaping()) {
+      assert(failing(), "must bail out w/ explicit message");
+      return;
+    }
+  }
+
+  print_method("Optimize finished", 2);
+}
+
+
+//------------------------------Code_Gen---------------------------------------
+// Given a graph, generate code for it
+void Compile::Code_Gen() {
+  if (failing())  return;
+
+  // Perform instruction selection.  You might think we could reclaim Matcher
+  // memory PDQ, but actually the Matcher is used in generating spill code.
+  // Internals of the Matcher (including some VectorSets) must remain live
+  // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
+  // set a bit in reclaimed memory.
+
+  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
+  // nodes.  Mapping is only valid at the root of each matched subtree.
+  NOT_PRODUCT( verify_graph_edges(); )
+
+  Node_List proj_list;
+  Matcher m(proj_list);
+  _matcher = &m;
+  {
+    TracePhase t2("matcher", &_t_matcher, true);
+    m.match();
+  }
+  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
+  // nodes.  Mapping is only valid at the root of each matched subtree.
+  NOT_PRODUCT( verify_graph_edges(); )
+
+  // If you have too many nodes, or if matching has failed, bail out
+  check_node_count(0, "out of nodes matching instructions");
+  if (failing())  return;
+
+  // Build a proper-looking CFG
+  PhaseCFG cfg(node_arena(), root(), m);
+  _cfg = &cfg;
+  {
+    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
+    cfg.Dominators();
+    if (failing())  return;
+
+    NOT_PRODUCT( verify_graph_edges(); )
+
+    cfg.Estimate_Block_Frequency();
+    cfg.GlobalCodeMotion(m,unique(),proj_list);
+
+    print_method("Global code motion", 2);
+
+    if (failing())  return;
+    NOT_PRODUCT( verify_graph_edges(); )
+
+    debug_only( cfg.verify(); )
+  }
+  NOT_PRODUCT( verify_graph_edges(); )
+
+  PhaseChaitin regalloc(unique(),cfg,m);
+  _regalloc = &regalloc;
+  {
+    TracePhase t2("regalloc", &_t_registerAllocation, true);
+    // Perform any platform dependent preallocation actions.  This is used,
+    // for example, to avoid taking an implicit null pointer exception
+    // using the frame pointer on win95.
+    _regalloc->pd_preallocate_hook();
+
+    // Perform register allocation.  After Chaitin, use-def chains are
+    // no longer accurate (at spill code) and so must be ignored.
+    // Node->LRG->reg mappings are still accurate.
+    _regalloc->Register_Allocate();
+
+    // Bail out if the allocator builds too many nodes
+    if (failing())  return;
+  }
+
+  // Prior to register allocation we kept empty basic blocks in case the
+  // the allocator needed a place to spill.  After register allocation we
+  // are not adding any new instructions.  If any basic block is empty, we
+  // can now safely remove it.
+  {
+    NOT_PRODUCT( TracePhase t2("removeEmpty", &_t_removeEmptyBlocks, TimeCompiler); )
+    cfg.RemoveEmpty();
+  }
+
+  // Perform any platform dependent postallocation verifications.
+  debug_only( _regalloc->pd_postallocate_verify_hook(); )
+
+  // Apply peephole optimizations
+  if( OptoPeephole ) {
+    NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
+    PhasePeephole peep( _regalloc, cfg);
+    peep.do_transform();
+  }
+
+  // Convert Nodes to instruction bits in a buffer
+  {
+    // %%%% workspace merge brought two timers together for one job
+    TracePhase t2a("output", &_t_output, true);
+    NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
+    Output();
+  }
+
+  print_method("End");
+
+  // He's dead, Jim.
+  _cfg     = (PhaseCFG*)0xdeadbeef;
+  _regalloc = (PhaseChaitin*)0xdeadbeef;
+}
+
+
+//------------------------------dump_asm---------------------------------------
+// Dump formatted assembly
+#ifndef PRODUCT
+void Compile::dump_asm(int *pcs, uint pc_limit) {
+  bool cut_short = false;
+  tty->print_cr("#");
+  tty->print("#  ");  _tf->dump();  tty->cr();
+  tty->print_cr("#");
+
+  // For all blocks
+  int pc = 0x0;                 // Program counter
+  char starts_bundle = ' ';
+  _regalloc->dump_frame();
+
+  Node *n = NULL;
+  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+    if (VMThread::should_terminate()) { cut_short = true; break; }
+    Block *b = _cfg->_blocks[i];
+    if (b->is_connector() && !Verbose) continue;
+    n = b->_nodes[0];
+    if (pcs && n->_idx < pc_limit)
+      tty->print("%3.3x   ", pcs[n->_idx]);
+    else
+      tty->print("      ");
+    b->dump_head( &_cfg->_bbs );
+    if (b->is_connector()) {
+      tty->print_cr("        # Empty connector block");
+    } else if (b->num_preds() == 2 && b->pred(1)->is_CatchProj() && b->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
+      tty->print_cr("        # Block is sole successor of call");
+    }
+
+    // For all instructions
+    Node *delay = NULL;
+    for( uint j = 0; j<b->_nodes.size(); j++ ) {
+      if (VMThread::should_terminate()) { cut_short = true; break; }
+      n = b->_nodes[j];
+      if (valid_bundle_info(n)) {
+        Bundle *bundle = node_bundling(n);
+        if (bundle->used_in_unconditional_delay()) {
+          delay = n;
+          continue;
+        }
+        if (bundle->starts_bundle())
+          starts_bundle = '+';
+      }
+
+      if( !n->is_Region() &&    // Dont print in the Assembly
+          !n->is_Phi() &&       // a few noisely useless nodes
+          !n->is_Proj() &&
+          !n->is_MachTemp() &&
+          !n->is_Catch() &&     // Would be nice to print exception table targets
+          !n->is_MergeMem() &&  // Not very interesting
+          !n->is_top() &&       // Debug info table constants
+          !(n->is_Con() && !n->is_Mach())// Debug info table constants
+          ) {
+        if (pcs && n->_idx < pc_limit)
+          tty->print("%3.3x", pcs[n->_idx]);
+        else
+          tty->print("   ");
+        tty->print(" %c ", starts_bundle);
+        starts_bundle = ' ';
+        tty->print("\t");
+        n->format(_regalloc, tty);
+        tty->cr();
+      }
+
+      // If we have an instruction with a delay slot, and have seen a delay,
+      // then back up and print it
+      if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
+        assert(delay != NULL, "no unconditional delay instruction");
+        if (node_bundling(delay)->starts_bundle())
+          starts_bundle = '+';
+        if (pcs && n->_idx < pc_limit)
+          tty->print("%3.3x", pcs[n->_idx]);
+        else
+          tty->print("   ");
+        tty->print(" %c ", starts_bundle);
+        starts_bundle = ' ';
+        tty->print("\t");
+        delay->format(_regalloc, tty);
+        tty->print_cr("");
+        delay = NULL;
+      }
+
+      // Dump the exception table as well
+      if( n->is_Catch() && (Verbose || WizardMode) ) {
+        // Print the exception table for this offset
+        _handler_table.print_subtable_for(pc);
+      }
+    }
+
+    if (pcs && n->_idx < pc_limit)
+      tty->print_cr("%3.3x", pcs[n->_idx]);
+    else
+      tty->print_cr("");
+
+    assert(cut_short || delay == NULL, "no unconditional delay branch");
+
+  } // End of per-block dump
+  tty->print_cr("");
+
+  if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
+}
+#endif
+
+//------------------------------Final_Reshape_Counts---------------------------
+// This class defines counters to help identify when a method
+// may/must be executed using hardware with only 24-bit precision.
+struct Final_Reshape_Counts : public StackObj {
+  int  _call_count;             // count non-inlined 'common' calls
+  int  _float_count;            // count float ops requiring 24-bit precision
+  int  _double_count;           // count double ops requiring more precision
+  int  _java_call_count;        // count non-inlined 'java' calls
+  VectorSet _visited;           // Visitation flags
+  Node_List _tests;             // Set of IfNodes & PCTableNodes
+
+  Final_Reshape_Counts() :
+    _call_count(0), _float_count(0), _double_count(0), _java_call_count(0),
+    _visited( Thread::current()->resource_area() ) { }
+
+  void inc_call_count  () { _call_count  ++; }
+  void inc_float_count () { _float_count ++; }
+  void inc_double_count() { _double_count++; }
+  void inc_java_call_count() { _java_call_count++; }
+
+  int  get_call_count  () const { return _call_count  ; }
+  int  get_float_count () const { return _float_count ; }
+  int  get_double_count() const { return _double_count; }
+  int  get_java_call_count() const { return _java_call_count; }
+};
+
+static bool oop_offset_is_sane(const TypeInstPtr* tp) {
+  ciInstanceKlass *k = tp->klass()->as_instance_klass();
+  // Make sure the offset goes inside the instance layout.
+  return (uint)tp->offset() < (uint)(oopDesc::header_size() + k->nonstatic_field_size())*wordSize;
+  // Note that OffsetBot and OffsetTop are very negative.
+}
+
+//------------------------------final_graph_reshaping_impl----------------------
+// Implement items 1-5 from final_graph_reshaping below.
+static void final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &fpu ) {
+
+  uint nop = n->Opcode();
+
+  // Check for 2-input instruction with "last use" on right input.
+  // Swap to left input.  Implements item (2).
+  if( n->req() == 3 &&          // two-input instruction
+      n->in(1)->outcnt() > 1 && // left use is NOT a last use
+      (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
+      n->in(2)->outcnt() == 1 &&// right use IS a last use
+      !n->in(2)->is_Con() ) {   // right use is not a constant
+    // Check for commutative opcode
+    switch( nop ) {
+    case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
+    case Op_MaxI:  case Op_MinI:
+    case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
+    case Op_AndL:  case Op_XorL:  case Op_OrL:
+    case Op_AndI:  case Op_XorI:  case Op_OrI: {
+      // Move "last use" input to left by swapping inputs
+      n->swap_edges(1, 2);
+      break;
+    }
+    default:
+      break;
+    }
+  }
+
+  // Count FPU ops and common calls, implements item (3)
+  switch( nop ) {
+  // Count all float operations that may use FPU
+  case Op_AddF:
+  case Op_SubF:
+  case Op_MulF:
+  case Op_DivF:
+  case Op_NegF:
+  case Op_ModF:
+  case Op_ConvI2F:
+  case Op_ConF:
+  case Op_CmpF:
+  case Op_CmpF3:
+  // case Op_ConvL2F: // longs are split into 32-bit halves
+    fpu.inc_float_count();
+    break;
+
+  case Op_ConvF2D:
+  case Op_ConvD2F:
+    fpu.inc_float_count();
+    fpu.inc_double_count();
+    break;
+
+  // Count all double operations that may use FPU
+  case Op_AddD:
+  case Op_SubD:
+  case Op_MulD:
+  case Op_DivD:
+  case Op_NegD:
+  case Op_ModD:
+  case Op_ConvI2D:
+  case Op_ConvD2I:
+  // case Op_ConvL2D: // handled by leaf call
+  // case Op_ConvD2L: // handled by leaf call
+  case Op_ConD:
+  case Op_CmpD:
+  case Op_CmpD3:
+    fpu.inc_double_count();
+    break;
+  case Op_Opaque1:              // Remove Opaque Nodes before matching
+  case Op_Opaque2:              // Remove Opaque Nodes before matching
+    n->replace_by(n->in(1));
+    break;
+  case Op_CallStaticJava:
+  case Op_CallJava:
+  case Op_CallDynamicJava:
+    fpu.inc_java_call_count(); // Count java call site;
+  case Op_CallRuntime:
+  case Op_CallLeaf:
+  case Op_CallLeafNoFP: {
+    assert( n->is_Call(), "" );
+    CallNode *call = n->as_Call();
+    // Count call sites where the FP mode bit would have to be flipped.
+    // Do not count uncommon runtime calls:
+    // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
+    // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
+    if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
+      fpu.inc_call_count();   // Count the call site
+    } else {                  // See if uncommon argument is shared
+      Node *n = call->in(TypeFunc::Parms);
+      int nop = n->Opcode();
+      // Clone shared simple arguments to uncommon calls, item (1).
+      if( n->outcnt() > 1 &&
+          !n->is_Proj() &&
+          nop != Op_CreateEx &&
+          nop != Op_CheckCastPP &&
+          !n->is_Mem() ) {
+        Node *x = n->clone();
+        call->set_req( TypeFunc::Parms, x );
+      }
+    }
+    break;
+  }
+
+  case Op_StoreD:
+  case Op_LoadD:
+  case Op_LoadD_unaligned:
+    fpu.inc_double_count();
+    goto handle_mem;
+  case Op_StoreF:
+  case Op_LoadF:
+    fpu.inc_float_count();
+    goto handle_mem;
+
+  case Op_StoreB:
+  case Op_StoreC:
+  case Op_StoreCM:
+  case Op_StorePConditional:
+  case Op_StoreI:
+  case Op_StoreL:
+  case Op_StoreLConditional:
+  case Op_CompareAndSwapI:
+  case Op_CompareAndSwapL:
+  case Op_CompareAndSwapP:
+  case Op_StoreP:
+  case Op_LoadB:
+  case Op_LoadC:
+  case Op_LoadI:
+  case Op_LoadKlass:
+  case Op_LoadL:
+  case Op_LoadL_unaligned:
+  case Op_LoadPLocked:
+  case Op_LoadLLocked:
+  case Op_LoadP:
+  case Op_LoadRange:
+  case Op_LoadS: {
+  handle_mem:
+#ifdef ASSERT
+    if( VerifyOptoOopOffsets ) {
+      assert( n->is_Mem(), "" );
+      MemNode *mem  = (MemNode*)n;
+      // Check to see if address types have grounded out somehow.
+      const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
+      assert( !tp || oop_offset_is_sane(tp), "" );
+    }
+#endif
+    break;
+  }
+  case Op_If:
+  case Op_CountedLoopEnd:
+    fpu._tests.push(n);         // Collect CFG split points
+    break;
+
+  case Op_AddP: {               // Assert sane base pointers
+    const Node *addp = n->in(AddPNode::Address);
+    assert( !addp->is_AddP() ||
+            addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
+            addp->in(AddPNode::Base) == n->in(AddPNode::Base),
+            "Base pointers must match" );
+    break;
+  }
+
+  case Op_ModI:
+    if (UseDivMod) {
+      // Check if a%b and a/b both exist
+      Node* d = n->find_similar(Op_DivI);
+      if (d) {
+        // Replace them with a fused divmod if supported
+        Compile* C = Compile::current();
+        if (Matcher::has_match_rule(Op_DivModI)) {
+          DivModINode* divmod = DivModINode::make(C, n);
+          d->replace_by(divmod->div_proj());
+          n->replace_by(divmod->mod_proj());
+        } else {
+          // replace a%b with a-((a/b)*b)
+          Node* mult = new (C, 3) MulINode(d, d->in(2));
+          Node* sub  = new (C, 3) SubINode(d->in(1), mult);
+          n->replace_by( sub );
+        }
+      }
+    }
+    break;
+
+  case Op_ModL:
+    if (UseDivMod) {
+      // Check if a%b and a/b both exist
+      Node* d = n->find_similar(Op_DivL);
+      if (d) {
+        // Replace them with a fused divmod if supported
+        Compile* C = Compile::current();
+        if (Matcher::has_match_rule(Op_DivModL)) {
+          DivModLNode* divmod = DivModLNode::make(C, n);
+          d->replace_by(divmod->div_proj());
+          n->replace_by(divmod->mod_proj());
+        } else {
+          // replace a%b with a-((a/b)*b)
+          Node* mult = new (C, 3) MulLNode(d, d->in(2));
+          Node* sub  = new (C, 3) SubLNode(d->in(1), mult);
+          n->replace_by( sub );
+        }
+      }
+    }
+    break;
+
+  case Op_Load16B:
+  case Op_Load8B:
+  case Op_Load4B:
+  case Op_Load8S:
+  case Op_Load4S:
+  case Op_Load2S:
+  case Op_Load8C:
+  case Op_Load4C:
+  case Op_Load2C:
+  case Op_Load4I:
+  case Op_Load2I:
+  case Op_Load2L:
+  case Op_Load4F:
+  case Op_Load2F:
+  case Op_Load2D:
+  case Op_Store16B:
+  case Op_Store8B:
+  case Op_Store4B:
+  case Op_Store8C:
+  case Op_Store4C:
+  case Op_Store2C:
+  case Op_Store4I:
+  case Op_Store2I:
+  case Op_Store2L:
+  case Op_Store4F:
+  case Op_Store2F:
+  case Op_Store2D:
+    break;
+
+  case Op_PackB:
+  case Op_PackS:
+  case Op_PackC:
+  case Op_PackI:
+  case Op_PackF:
+  case Op_PackL:
+  case Op_PackD:
+    if (n->req()-1 > 2) {
+      // Replace many operand PackNodes with a binary tree for matching
+      PackNode* p = (PackNode*) n;
+      Node* btp = p->binaryTreePack(Compile::current(), 1, n->req());
+      n->replace_by(btp);
+    }
+    break;
+  default:
+    assert( !n->is_Call(), "" );
+    assert( !n->is_Mem(), "" );
+    if( n->is_If() || n->is_PCTable() )
+      fpu._tests.push(n);       // Collect CFG split points
+    break;
+  }
+}
+
+//------------------------------final_graph_reshaping_walk---------------------
+// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
+// requires that the walk visits a node's inputs before visiting the node.
+static void final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &fpu ) {
+  fpu._visited.set(root->_idx); // first, mark node as visited
+  uint cnt = root->req();
+  Node *n = root;
+  uint  i = 0;
+  while (true) {
+    if (i < cnt) {
+      // Place all non-visited non-null inputs onto stack
+      Node* m = n->in(i);
+      ++i;
+      if (m != NULL && !fpu._visited.test_set(m->_idx)) {
+        cnt = m->req();
+        nstack.push(n, i); // put on stack parent and next input's index
+        n = m;
+        i = 0;
+      }
+    } else {
+      // Now do post-visit work
+      final_graph_reshaping_impl( n, fpu );
+      if (nstack.is_empty())
+        break;             // finished
+      n = nstack.node();   // Get node from stack
+      cnt = n->req();
+      i = nstack.index();
+      nstack.pop();        // Shift to the next node on stack
+    }
+  }
+}
+
+//------------------------------final_graph_reshaping--------------------------
+// Final Graph Reshaping.
+//
+// (1) Clone simple inputs to uncommon calls, so they can be scheduled late
+//     and not commoned up and forced early.  Must come after regular
+//     optimizations to avoid GVN undoing the cloning.  Clone constant
+//     inputs to Loop Phis; these will be split by the allocator anyways.
+//     Remove Opaque nodes.
+// (2) Move last-uses by commutative operations to the left input to encourage
+//     Intel update-in-place two-address operations and better register usage
+//     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
+//     calls canonicalizing them back.
+// (3) Count the number of double-precision FP ops, single-precision FP ops
+//     and call sites.  On Intel, we can get correct rounding either by
+//     forcing singles to memory (requires extra stores and loads after each
+//     FP bytecode) or we can set a rounding mode bit (requires setting and
+//     clearing the mode bit around call sites).  The mode bit is only used
+//     if the relative frequency of single FP ops to calls is low enough.
+//     This is a key transform for SPEC mpeg_audio.
+// (4) Detect infinite loops; blobs of code reachable from above but not
+//     below.  Several of the Code_Gen algorithms fail on such code shapes,
+//     so we simply bail out.  Happens a lot in ZKM.jar, but also happens
+//     from time to time in other codes (such as -Xcomp finalizer loops, etc).
+//     Detection is by looking for IfNodes where only 1 projection is
+//     reachable from below or CatchNodes missing some targets.
+// (5) Assert for insane oop offsets in debug mode.
+
+bool Compile::final_graph_reshaping() {
+  // an infinite loop may have been eliminated by the optimizer,
+  // in which case the graph will be empty.
+  if (root()->req() == 1) {
+    record_method_not_compilable("trivial infinite loop");
+    return true;
+  }
+
+  Final_Reshape_Counts fpu;
+
+  // Visit everybody reachable!
+  // Allocate stack of size C->unique()/2 to avoid frequent realloc
+  Node_Stack nstack(unique() >> 1);
+  final_graph_reshaping_walk(nstack, root(), fpu);
+
+  // Check for unreachable (from below) code (i.e., infinite loops).
+  for( uint i = 0; i < fpu._tests.size(); i++ ) {
+    Node *n = fpu._tests[i];
+    assert( n->is_PCTable() || n->is_If(), "either PCTables or IfNodes" );
+    // Get number of CFG targets; 2 for IfNodes or _size for PCTables.
+    // Note that PCTables include exception targets after calls.
+    uint expected_kids = n->is_PCTable() ? n->as_PCTable()->_size : 2;
+    if (n->outcnt() != expected_kids) {
+      // Check for a few special cases.  Rethrow Nodes never take the
+      // 'fall-thru' path, so expected kids is 1 less.
+      if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
+        if (n->in(0)->in(0)->is_Call()) {
+          CallNode *call = n->in(0)->in(0)->as_Call();
+          if (call->entry_point() == OptoRuntime::rethrow_stub()) {
+            expected_kids--;      // Rethrow always has 1 less kid
+          } else if (call->req() > TypeFunc::Parms &&
+                     call->is_CallDynamicJava()) {
+            // Check for null receiver. In such case, the optimizer has
+            // detected that the virtual call will always result in a null
+            // pointer exception. The fall-through projection of this CatchNode
+            // will not be populated.
+            Node *arg0 = call->in(TypeFunc::Parms);
+            if (arg0->is_Type() &&
+                arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
+              expected_kids--;
+            }
+          } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
+                     call->req() > TypeFunc::Parms+1 &&
+                     call->is_CallStaticJava()) {
+            // Check for negative array length. In such case, the optimizer has
+            // detected that the allocation attempt will always result in an
+            // exception. There is no fall-through projection of this CatchNode .
+            Node *arg1 = call->in(TypeFunc::Parms+1);
+            if (arg1->is_Type() &&
+                arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
+              expected_kids--;
+            }
+          }
+        }
+      }
+      // Recheck with a better notion of 'expected_kids'
+      if (n->outcnt() != expected_kids) {
+        record_method_not_compilable("malformed control flow");
+        return true;            // Not all targets reachable!
+      }
+    }
+    // Check that I actually visited all kids.  Unreached kids
+    // must be infinite loops.
+    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
+      if (!fpu._visited.test(n->fast_out(j)->_idx)) {
+        record_method_not_compilable("infinite loop");
+        return true;            // Found unvisited kid; must be unreach
+      }
+  }
+
+  // If original bytecodes contained a mixture of floats and doubles
+  // check if the optimizer has made it homogenous, item (3).
+  if( Use24BitFPMode && Use24BitFP &&
+      fpu.get_float_count() > 32 &&
+      fpu.get_double_count() == 0 &&
+      (10 * fpu.get_call_count() < fpu.get_float_count()) ) {
+    set_24_bit_selection_and_mode( false,  true );
+  }
+
+  set_has_java_calls(fpu.get_java_call_count() > 0);
+
+  // No infinite loops, no reason to bail out.
+  return false;
+}
+
+//-----------------------------too_many_traps----------------------------------
+// Report if there are too many traps at the current method and bci.
+// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
+bool Compile::too_many_traps(ciMethod* method,
+                             int bci,
+                             Deoptimization::DeoptReason reason) {
+  ciMethodData* md = method->method_data();
+  if (md->is_empty()) {
+    // Assume the trap has not occurred, or that it occurred only
+    // because of a transient condition during start-up in the interpreter.
+    return false;
+  }
+  if (md->has_trap_at(bci, reason) != 0) {
+    // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
+    // Also, if there are multiple reasons, or if there is no per-BCI record,
+    // assume the worst.
+    if (log())
+      log()->elem("observe trap='%s' count='%d'",
+                  Deoptimization::trap_reason_name(reason),
+                  md->trap_count(reason));
+    return true;
+  } else {
+    // Ignore method/bci and see if there have been too many globally.
+    return too_many_traps(reason, md);
+  }
+}
+
+// Less-accurate variant which does not require a method and bci.
+bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
+                             ciMethodData* logmd) {
+ if (trap_count(reason) >= (uint)PerMethodTrapLimit) {
+    // Too many traps globally.
+    // Note that we use cumulative trap_count, not just md->trap_count.
+    if (log()) {
+      int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
+      log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
+                  Deoptimization::trap_reason_name(reason),
+                  mcount, trap_count(reason));
+    }
+    return true;
+  } else {
+    // The coast is clear.
+    return false;
+  }
+}
+
+//--------------------------too_many_recompiles--------------------------------
+// Report if there are too many recompiles at the current method and bci.
+// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
+// Is not eager to return true, since this will cause the compiler to use
+// Action_none for a trap point, to avoid too many recompilations.
+bool Compile::too_many_recompiles(ciMethod* method,
+                                  int bci,
+                                  Deoptimization::DeoptReason reason) {
+  ciMethodData* md = method->method_data();
+  if (md->is_empty()) {
+    // Assume the trap has not occurred, or that it occurred only
+    // because of a transient condition during start-up in the interpreter.
+    return false;
+  }
+  // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
+  uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
+  uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
+  Deoptimization::DeoptReason per_bc_reason
+    = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
+  if ((per_bc_reason == Deoptimization::Reason_none
+       || md->has_trap_at(bci, reason) != 0)
+      // The trap frequency measure we care about is the recompile count:
+      && md->trap_recompiled_at(bci)
+      && md->overflow_recompile_count() >= bc_cutoff) {
+    // Do not emit a trap here if it has already caused recompilations.
+    // Also, if there are multiple reasons, or if there is no per-BCI record,
+    // assume the worst.
+    if (log())
+      log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
+                  Deoptimization::trap_reason_name(reason),
+                  md->trap_count(reason),
+                  md->overflow_recompile_count());
+    return true;
+  } else if (trap_count(reason) != 0
+             && decompile_count() >= m_cutoff) {
+    // Too many recompiles globally, and we have seen this sort of trap.
+    // Use cumulative decompile_count, not just md->decompile_count.
+    if (log())
+      log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
+                  Deoptimization::trap_reason_name(reason),
+                  md->trap_count(reason), trap_count(reason),
+                  md->decompile_count(), decompile_count());
+    return true;
+  } else {
+    // The coast is clear.
+    return false;
+  }
+}
+
+
+#ifndef PRODUCT
+//------------------------------verify_graph_edges---------------------------
+// Walk the Graph and verify that there is a one-to-one correspondence
+// between Use-Def edges and Def-Use edges in the graph.
+void Compile::verify_graph_edges(bool no_dead_code) {
+  if (VerifyGraphEdges) {
+    ResourceArea *area = Thread::current()->resource_area();
+    Unique_Node_List visited(area);
+    // Call recursive graph walk to check edges
+    _root->verify_edges(visited);
+    if (no_dead_code) {
+      // Now make sure that no visited node is used by an unvisited node.
+      bool dead_nodes = 0;
+      Unique_Node_List checked(area);
+      while (visited.size() > 0) {
+        Node* n = visited.pop();
+        checked.push(n);
+        for (uint i = 0; i < n->outcnt(); i++) {
+          Node* use = n->raw_out(i);
+          if (checked.member(use))  continue;  // already checked
+          if (visited.member(use))  continue;  // already in the graph
+          if (use->is_Con())        continue;  // a dead ConNode is OK
+          // At this point, we have found a dead node which is DU-reachable.
+          if (dead_nodes++ == 0)
+            tty->print_cr("*** Dead nodes reachable via DU edges:");
+          use->dump(2);
+          tty->print_cr("---");
+          checked.push(use);  // No repeats; pretend it is now checked.
+        }
+      }
+      assert(dead_nodes == 0, "using nodes must be reachable from root");
+    }
+  }
+}
+#endif
+
+// The Compile object keeps track of failure reasons separately from the ciEnv.
+// This is required because there is not quite a 1-1 relation between the
+// ciEnv and its compilation task and the Compile object.  Note that one
+// ciEnv might use two Compile objects, if C2Compiler::compile_method decides
+// to backtrack and retry without subsuming loads.  Other than this backtracking
+// behavior, the Compile's failure reason is quietly copied up to the ciEnv
+// by the logic in C2Compiler.
+void Compile::record_failure(const char* reason) {
+  if (log() != NULL) {
+    log()->elem("failure reason='%s' phase='compile'", reason);
+  }
+  if (_failure_reason == NULL) {
+    // Record the first failure reason.
+    _failure_reason = reason;
+  }
+  _root = NULL;  // flush the graph, too
+}
+
+Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
+  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false)
+{
+  if (dolog) {
+    C = Compile::current();
+    _log = C->log();
+  } else {
+    C = NULL;
+    _log = NULL;
+  }
+  if (_log != NULL) {
+    _log->begin_head("phase name='%s' nodes='%d'", name, C->unique());
+    _log->stamp();
+    _log->end_head();
+  }
+}
+
+Compile::TracePhase::~TracePhase() {
+  if (_log != NULL) {
+    _log->done("phase nodes='%d'", C->unique());
+  }
+}
diff --git a/src/share/vm/opto/compile.hpp b/src/share/vm/opto/compile.hpp
new file mode 100644
index 000000000..fcab3a378
--- /dev/null
+++ b/src/share/vm/opto/compile.hpp
@@ -0,0 +1,720 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Block;
+class Bundle;
+class C2Compiler;
+class CallGenerator;
+class ConnectionGraph;
+class InlineTree;
+class Int_Array;
+class Matcher;
+class MachNode;
+class Node;
+class Node_Array;
+class Node_Notes;
+class OptoReg;
+class PhaseCFG;
+class PhaseGVN;
+class PhaseRegAlloc;
+class PhaseCCP;
+class PhaseCCP_DCE;
+class RootNode;
+class relocInfo;
+class Scope;
+class StartNode;
+class SafePointNode;
+class JVMState;
+class TypeData;
+class TypePtr;
+class TypeFunc;
+class Unique_Node_List;
+class nmethod;
+class WarmCallInfo;
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+class MachSafePointNode;
+#endif
+
+//------------------------------Compile----------------------------------------
+// This class defines a top-level Compiler invocation.
+
+class Compile : public Phase {
+ public:
+  // Fixed alias indexes.  (See also MergeMemNode.)
+  enum {
+    AliasIdxTop = 1,  // pseudo-index, aliases to nothing (used as sentinel value)
+    AliasIdxBot = 2,  // pseudo-index, aliases to everything
+    AliasIdxRaw = 3   // hard-wired index for TypeRawPtr::BOTTOM
+  };
+
+  // Variant of TraceTime(NULL, &_t_accumulator, TimeCompiler);
+  // Integrated with logging.  If logging is turned on, and dolog is true,
+  // then brackets are put into the log, with time stamps and node counts.
+  // (The time collection itself is always conditionalized on TimeCompiler.)
+  class TracePhase : public TraceTime {
+   private:
+    Compile*    C;
+    CompileLog* _log;
+   public:
+    TracePhase(const char* name, elapsedTimer* accumulator, bool dolog);
+    ~TracePhase();
+  };
+
+  // Information per category of alias (memory slice)
+  class AliasType {
+   private:
+    friend class Compile;
+
+    int             _index;         // unique index, used with MergeMemNode
+    const TypePtr*  _adr_type;      // normalized address type
+    ciField*        _field;         // relevant instance field, or null if none
+    bool            _is_rewritable; // false if the memory is write-once only
+    int             _general_index; // if this is type is an instance, the general
+                                    // type that this is an instance of
+
+    void Init(int i, const TypePtr* at);
+
+   public:
+    int             index()         const { return _index; }
+    const TypePtr*  adr_type()      const { return _adr_type; }
+    ciField*        field()         const { return _field; }
+    bool            is_rewritable() const { return _is_rewritable; }
+    bool            is_volatile()   const { return (_field ? _field->is_volatile() : false); }
+    int             general_index() const { return (_general_index != 0) ? _general_index : _index; }
+
+    void set_rewritable(bool z) { _is_rewritable = z; }
+    void set_field(ciField* f) {
+      assert(!_field,"");
+      _field = f;
+      if (f->is_final())  _is_rewritable = false;
+    }
+
+    void print_on(outputStream* st) PRODUCT_RETURN;
+  };
+
+  enum {
+    logAliasCacheSize = 6,
+    AliasCacheSize = (1<<logAliasCacheSize)
+  };
+  struct AliasCacheEntry { const TypePtr* _adr_type; int _index; };  // simple duple type
+  enum {
+    trapHistLength = methodDataOopDesc::_trap_hist_limit
+  };
+
+ private:
+  // Fixed parameters to this compilation.
+  const int             _compile_id;
+  const bool            _save_argument_registers; // save/restore arg regs for trampolines
+  const bool            _subsume_loads;         // Load can be matched as part of a larger op.
+  ciMethod*             _method;                // The method being compiled.
+  int                   _entry_bci;             // entry bci for osr methods.
+  const TypeFunc*       _tf;                    // My kind of signature
+  InlineTree*           _ilt;                   // Ditto (temporary).
+  address               _stub_function;         // VM entry for stub being compiled, or NULL
+  const char*           _stub_name;             // Name of stub or adapter being compiled, or NULL
+  address               _stub_entry_point;      // Compile code entry for generated stub, or NULL
+
+  // Control of this compilation.
+  int                   _num_loop_opts;         // Number of iterations for doing loop optimiztions
+  int                   _max_inline_size;       // Max inline size for this compilation
+  int                   _freq_inline_size;      // Max hot method inline size for this compilation
+  int                   _fixed_slots;           // count of frame slots not allocated by the register
+                                                // allocator i.e. locks, original deopt pc, etc.
+  // For deopt
+  int                   _orig_pc_slot;
+  int                   _orig_pc_slot_offset_in_bytes;
+
+  int                   _major_progress;        // Count of something big happening
+  bool                  _deopt_happens;         // TRUE if de-optimization CAN happen
+  bool                  _has_loops;             // True if the method _may_ have some loops
+  bool                  _has_split_ifs;         // True if the method _may_ have some split-if
+  bool                  _has_unsafe_access;     // True if the method _may_ produce faults in unsafe loads or stores.
+  uint                  _trap_hist[trapHistLength];  // Cumulative traps
+  bool                  _trap_can_recompile;    // Have we emitted a recompiling trap?
+  uint                  _decompile_count;       // Cumulative decompilation counts.
+  bool                  _do_inlining;           // True if we intend to do inlining
+  bool                  _do_scheduling;         // True if we intend to do scheduling
+  bool                  _do_count_invocations;  // True if we generate code to count invocations
+  bool                  _do_method_data_update; // True if we generate code to update methodDataOops
+  int                   _AliasLevel;            // Locally-adjusted version of AliasLevel flag.
+  bool                  _print_assembly;        // True if we should dump assembly code for this compilation
+#ifndef PRODUCT
+  bool                  _trace_opto_output;
+#endif
+
+  // Compilation environment.
+  Arena                 _comp_arena;            // Arena with lifetime equivalent to Compile
+  ciEnv*                _env;                   // CI interface
+  CompileLog*           _log;                   // from CompilerThread
+  const char*           _failure_reason;        // for record_failure/failing pattern
+  GrowableArray<CallGenerator*>* _intrinsics;   // List of intrinsics.
+  GrowableArray<Node*>* _macro_nodes;           // List of nodes which need to be expanded before matching.
+  ConnectionGraph*      _congraph;
+#ifndef PRODUCT
+  IdealGraphPrinter*    _printer;
+#endif
+
+  // Node management
+  uint                  _unique;                // Counter for unique Node indices
+  debug_only(static int _debug_idx;)            // Monotonic counter (not reset), use -XX:BreakAtNode=<idx>
+  Arena                 _node_arena;            // Arena for new-space Nodes
+  Arena                 _old_arena;             // Arena for old-space Nodes, lifetime during xform
+  RootNode*             _root;                  // Unique root of compilation, or NULL after bail-out.
+  Node*                 _top;                   // Unique top node.  (Reset by various phases.)
+
+  Node*                 _immutable_memory;      // Initial memory state
+
+  Node*                 _recent_alloc_obj;
+  Node*                 _recent_alloc_ctl;
+
+  // Blocked array of debugging and profiling information,
+  // tracked per node.
+  enum { _log2_node_notes_block_size = 8,
+         _node_notes_block_size = (1<<_log2_node_notes_block_size)
+  };
+  GrowableArray<Node_Notes*>* _node_note_array;
+  Node_Notes*           _default_node_notes;  // default notes for new nodes
+
+  // After parsing and every bulk phase we hang onto the Root instruction.
+  // The RootNode instruction is where the whole program begins.  It produces
+  // the initial Control and BOTTOM for everybody else.
+
+  // Type management
+  Arena                 _Compile_types;         // Arena for all types
+  Arena*                _type_arena;            // Alias for _Compile_types except in Initialize_shared()
+  Dict*                 _type_dict;             // Intern table
+  void*                 _type_hwm;              // Last allocation (see Type::operator new/delete)
+  size_t                _type_last_size;        // Last allocation size (see Type::operator new/delete)
+  ciMethod*             _last_tf_m;             // Cache for
+  const TypeFunc*       _last_tf;               //  TypeFunc::make
+  AliasType**           _alias_types;           // List of alias types seen so far.
+  int                   _num_alias_types;       // Logical length of _alias_types
+  int                   _max_alias_types;       // Physical length of _alias_types
+  AliasCacheEntry       _alias_cache[AliasCacheSize]; // Gets aliases w/o data structure walking
+
+  // Parsing, optimization
+  PhaseGVN*             _initial_gvn;           // Results of parse-time PhaseGVN
+  Unique_Node_List*     _for_igvn;              // Initial work-list for next round of Iterative GVN
+  WarmCallInfo*         _warm_calls;            // Sorted work-list for heat-based inlining.
+
+  // Matching, CFG layout, allocation, code generation
+  PhaseCFG*             _cfg;                   // Results of CFG finding
+  bool                  _select_24_bit_instr;   // We selected an instruction with a 24-bit result
+  bool                  _in_24_bit_fp_mode;     // We are emitting instructions with 24-bit results
+  bool                  _has_java_calls;        // True if the method has java calls
+  Matcher*              _matcher;               // Engine to map ideal to machine instructions
+  PhaseRegAlloc*        _regalloc;              // Results of register allocation.
+  int                   _frame_slots;           // Size of total frame in stack slots
+  CodeOffsets           _code_offsets;          // Offsets into the code for various interesting entries
+  RegMask               _FIRST_STACK_mask;      // All stack slots usable for spills (depends on frame layout)
+  Arena*                _indexSet_arena;        // control IndexSet allocation within PhaseChaitin
+  void*                 _indexSet_free_block_list; // free list of IndexSet bit blocks
+
+  uint                  _node_bundling_limit;
+  Bundle*               _node_bundling_base;    // Information for instruction bundling
+
+  // Instruction bits passed off to the VM
+  int                   _method_size;           // Size of nmethod code segment in bytes
+  CodeBuffer            _code_buffer;           // Where the code is assembled
+  int                   _first_block_size;      // Size of unvalidated entry point code / OSR poison code
+  ExceptionHandlerTable _handler_table;         // Table of native-code exception handlers
+  ImplicitExceptionTable _inc_table;            // Table of implicit null checks in native code
+  OopMapSet*            _oop_map_set;           // Table of oop maps (one for each safepoint location)
+  static int            _CompiledZap_count;     // counter compared against CompileZap[First/Last]
+  BufferBlob*           _scratch_buffer_blob;   // For temporary code buffers.
+  relocInfo*            _scratch_locs_memory;   // For temporary code buffers.
+
+ public:
+  // Accessors
+
+  // The Compile instance currently active in this (compiler) thread.
+  static Compile* current() {
+    return (Compile*) ciEnv::current()->compiler_data();
+  }
+
+  // ID for this compilation.  Useful for setting breakpoints in the debugger.
+  int               compile_id() const          { return _compile_id; }
+
+  // Does this compilation allow instructions to subsume loads?  User
+  // instructions that subsume a load may result in an unschedulable
+  // instruction sequence.
+  bool              subsume_loads() const       { return _subsume_loads; }
+  bool              save_argument_registers() const { return _save_argument_registers; }
+
+
+  // Other fixed compilation parameters.
+  ciMethod*         method() const              { return _method; }
+  int               entry_bci() const           { return _entry_bci; }
+  bool              is_osr_compilation() const  { return _entry_bci != InvocationEntryBci; }
+  bool              is_method_compilation() const { return (_method != NULL && !_method->flags().is_native()); }
+  const TypeFunc*   tf() const                  { assert(_tf!=NULL, ""); return _tf; }
+  void         init_tf(const TypeFunc* tf)      { assert(_tf==NULL, ""); _tf = tf; }
+  InlineTree*       ilt() const                 { return _ilt; }
+  address           stub_function() const       { return _stub_function; }
+  const char*       stub_name() const           { return _stub_name; }
+  address           stub_entry_point() const    { return _stub_entry_point; }
+
+  // Control of this compilation.
+  int               fixed_slots() const         { assert(_fixed_slots >= 0, "");         return _fixed_slots; }
+  void          set_fixed_slots(int n)          { _fixed_slots = n; }
+  int               major_progress() const      { return _major_progress; }
+  void          set_major_progress()            { _major_progress++; }
+  void        clear_major_progress()            { _major_progress = 0; }
+  int               num_loop_opts() const       { return _num_loop_opts; }
+  void          set_num_loop_opts(int n)        { _num_loop_opts = n; }
+  int               max_inline_size() const     { return _max_inline_size; }
+  void          set_freq_inline_size(int n)     { _freq_inline_size = n; }
+  int               freq_inline_size() const    { return _freq_inline_size; }
+  void          set_max_inline_size(int n)      { _max_inline_size = n; }
+  bool              deopt_happens() const       { return _deopt_happens; }
+  bool              has_loops() const           { return _has_loops; }
+  void          set_has_loops(bool z)           { _has_loops = z; }
+  bool              has_split_ifs() const       { return _has_split_ifs; }
+  void          set_has_split_ifs(bool z)       { _has_split_ifs = z; }
+  bool              has_unsafe_access() const   { return _has_unsafe_access; }
+  void          set_has_unsafe_access(bool z)   { _has_unsafe_access = z; }
+  void          set_trap_count(uint r, uint c)  { assert(r < trapHistLength, "oob");        _trap_hist[r] = c; }
+  uint              trap_count(uint r) const    { assert(r < trapHistLength, "oob"); return _trap_hist[r]; }
+  bool              trap_can_recompile() const  { return _trap_can_recompile; }
+  void          set_trap_can_recompile(bool z)  { _trap_can_recompile = z; }
+  uint              decompile_count() const     { return _decompile_count; }
+  void          set_decompile_count(uint c)     { _decompile_count = c; }
+  bool              allow_range_check_smearing() const;
+  bool              do_inlining() const         { return _do_inlining; }
+  void          set_do_inlining(bool z)         { _do_inlining = z; }
+  bool              do_scheduling() const       { return _do_scheduling; }
+  void          set_do_scheduling(bool z)       { _do_scheduling = z; }
+  bool              do_count_invocations() const{ return _do_count_invocations; }
+  void          set_do_count_invocations(bool z){ _do_count_invocations = z; }
+  bool              do_method_data_update() const { return _do_method_data_update; }
+  void          set_do_method_data_update(bool z) { _do_method_data_update = z; }
+  int               AliasLevel() const          { return _AliasLevel; }
+  bool              print_assembly() const       { return _print_assembly; }
+  void          set_print_assembly(bool z)       { _print_assembly = z; }
+  // check the CompilerOracle for special behaviours for this compile
+  bool          method_has_option(const char * option) {
+    return method() != NULL && method()->has_option(option);
+  }
+#ifndef PRODUCT
+  bool          trace_opto_output() const       { return _trace_opto_output; }
+#endif
+
+  void begin_method() {
+#ifndef PRODUCT
+    if (_printer) _printer->begin_method(this);
+#endif
+  }
+  void print_method(const char * name, int level = 1) {
+#ifndef PRODUCT
+    if (_printer) _printer->print_method(this, name, level);
+#endif
+  }
+  void end_method() {
+#ifndef PRODUCT
+    if (_printer) _printer->end_method();
+#endif
+  }
+
+  int           macro_count()                   { return _macro_nodes->length(); }
+  Node*         macro_node(int idx)             { return _macro_nodes->at(idx); }
+  ConnectionGraph* congraph()                   { return _congraph;}
+  void add_macro_node(Node * n) {
+    //assert(n->is_macro(), "must be a macro node");
+    assert(!_macro_nodes->contains(n), " duplicate entry in expand list");
+    _macro_nodes->append(n);
+  }
+  void remove_macro_node(Node * n) {
+    // this function may be called twice for a node so check
+    // that the node is in the array before attempting to remove it
+    if (_macro_nodes->contains(n))
+      _macro_nodes->remove(n);
+  }
+
+  // Compilation environment.
+  Arena*            comp_arena()                { return &_comp_arena; }
+  ciEnv*            env() const                 { return _env; }
+  CompileLog*       log() const                 { return _log; }
+  bool              failing() const             { return _env->failing() || _failure_reason != NULL; }
+  const char* failure_reason() { return _failure_reason; }
+  bool              failure_reason_is(const char* r) { return (r==_failure_reason) || (r!=NULL && _failure_reason!=NULL && strcmp(r, _failure_reason)==0); }
+
+  void record_failure(const char* reason);
+  void record_method_not_compilable(const char* reason, bool all_tiers = false) {
+    // All bailouts cover "all_tiers" when TieredCompilation is off.
+    if (!TieredCompilation) all_tiers = true;
+    env()->record_method_not_compilable(reason, all_tiers);
+    // Record failure reason.
+    record_failure(reason);
+  }
+  void record_method_not_compilable_all_tiers(const char* reason) {
+    record_method_not_compilable(reason, true);
+  }
+  bool check_node_count(uint margin, const char* reason) {
+    if (unique() + margin > (uint)MaxNodeLimit) {
+      record_method_not_compilable(reason);
+      return true;
+    } else {
+      return false;
+    }
+  }
+
+  // Node management
+  uint              unique() const              { return _unique; }
+  uint         next_unique()                    { return _unique++; }
+  void          set_unique(uint i)              { _unique = i; }
+  static int        debug_idx()                 { return debug_only(_debug_idx)+0; }
+  static void   set_debug_idx(int i)            { debug_only(_debug_idx = i); }
+  Arena*            node_arena()                { return &_node_arena; }
+  Arena*            old_arena()                 { return &_old_arena; }
+  RootNode*         root() const                { return _root; }
+  void          set_root(RootNode* r)           { _root = r; }
+  StartNode*        start() const;              // (Derived from root.)
+  void         init_start(StartNode* s);
+  Node*             immutable_memory();
+
+  Node*             recent_alloc_ctl() const    { return _recent_alloc_ctl; }
+  Node*             recent_alloc_obj() const    { return _recent_alloc_obj; }
+  void          set_recent_alloc(Node* ctl, Node* obj) {
+                                                  _recent_alloc_ctl = ctl;
+                                                  _recent_alloc_obj = obj;
+                                                }
+
+  // Handy undefined Node
+  Node*             top() const                 { return _top; }
+
+  // these are used by guys who need to know about creation and transformation of top:
+  Node*             cached_top_node()           { return _top; }
+  void          set_cached_top_node(Node* tn);
+
+  GrowableArray<Node_Notes*>* node_note_array() const { return _node_note_array; }
+  void set_node_note_array(GrowableArray<Node_Notes*>* arr) { _node_note_array = arr; }
+  Node_Notes* default_node_notes() const        { return _default_node_notes; }
+  void    set_default_node_notes(Node_Notes* n) { _default_node_notes = n; }
+
+  Node_Notes*       node_notes_at(int idx) {
+    return locate_node_notes(_node_note_array, idx, false);
+  }
+  inline bool   set_node_notes_at(int idx, Node_Notes* value);
+
+  // Copy notes from source to dest, if they exist.
+  // Overwrite dest only if source provides something.
+  // Return true if information was moved.
+  bool copy_node_notes_to(Node* dest, Node* source);
+
+  // Workhorse function to sort out the blocked Node_Notes array:
+  inline Node_Notes* locate_node_notes(GrowableArray<Node_Notes*>* arr,
+                                       int idx, bool can_grow = false);
+
+  void grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by);
+
+  // Type management
+  Arena*            type_arena()                { return _type_arena; }
+  Dict*             type_dict()                 { return _type_dict; }
+  void*             type_hwm()                  { return _type_hwm; }
+  size_t            type_last_size()            { return _type_last_size; }
+  int               num_alias_types()           { return _num_alias_types; }
+
+  void          init_type_arena()                       { _type_arena = &_Compile_types; }
+  void          set_type_arena(Arena* a)                { _type_arena = a; }
+  void          set_type_dict(Dict* d)                  { _type_dict = d; }
+  void          set_type_hwm(void* p)                   { _type_hwm = p; }
+  void          set_type_last_size(size_t sz)           { _type_last_size = sz; }
+
+  const TypeFunc* last_tf(ciMethod* m) {
+    return (m == _last_tf_m) ? _last_tf : NULL;
+  }
+  void set_last_tf(ciMethod* m, const TypeFunc* tf) {
+    assert(m != NULL || tf == NULL, "");
+    _last_tf_m = m;
+    _last_tf = tf;
+  }
+
+  AliasType*        alias_type(int                idx)  { assert(idx < num_alias_types(), "oob"); return _alias_types[idx]; }
+  AliasType*        alias_type(const TypePtr* adr_type) { return find_alias_type(adr_type, false); }
+  bool         have_alias_type(const TypePtr* adr_type);
+  AliasType*        alias_type(ciField*         field);
+
+  int               get_alias_index(const TypePtr* at)  { return alias_type(at)->index(); }
+  const TypePtr*    get_adr_type(uint aidx)             { return alias_type(aidx)->adr_type(); }
+  int               get_general_index(uint aidx)        { return alias_type(aidx)->general_index(); }
+
+  // Building nodes
+  void              rethrow_exceptions(JVMState* jvms);
+  void              return_values(JVMState* jvms);
+  JVMState*         build_start_state(StartNode* start, const TypeFunc* tf);
+
+  // Decide how to build a call.
+  // The profile factor is a discount to apply to this site's interp. profile.
+  CallGenerator*    call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float profile_factor);
+
+  // Report if there were too many traps at a current method and bci.
+  // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
+  // If there is no MDO at all, report no trap unless told to assume it.
+  bool too_many_traps(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
+  // This version, unspecific to a particular bci, asks if
+  // PerMethodTrapLimit was exceeded for all inlined methods seen so far.
+  bool too_many_traps(Deoptimization::DeoptReason reason,
+                      // Privately used parameter for logging:
+                      ciMethodData* logmd = NULL);
+  // Report if there were too many recompiles at a method and bci.
+  bool too_many_recompiles(ciMethod* method, int bci, Deoptimization::DeoptReason reason);
+
+  // Parsing, optimization
+  PhaseGVN*         initial_gvn()               { return _initial_gvn; }
+  Unique_Node_List* for_igvn()                  { return _for_igvn; }
+  inline void       record_for_igvn(Node* n);   // Body is after class Unique_Node_List.
+  void              record_for_escape_analysis(Node* n);
+  void          set_initial_gvn(PhaseGVN *gvn)           { _initial_gvn = gvn; }
+  void          set_for_igvn(Unique_Node_List *for_igvn) { _for_igvn = for_igvn; }
+
+  void              identify_useful_nodes(Unique_Node_List &useful);
+  void              remove_useless_nodes  (Unique_Node_List &useful);
+
+  WarmCallInfo*     warm_calls() const          { return _warm_calls; }
+  void          set_warm_calls(WarmCallInfo* l) { _warm_calls = l; }
+  WarmCallInfo* pop_warm_call();
+
+  // Matching, CFG layout, allocation, code generation
+  PhaseCFG*         cfg()                       { return _cfg; }
+  bool              select_24_bit_instr() const { return _select_24_bit_instr; }
+  bool              in_24_bit_fp_mode() const   { return _in_24_bit_fp_mode; }
+  bool              has_java_calls() const      { return _has_java_calls; }
+  Matcher*          matcher()                   { return _matcher; }
+  PhaseRegAlloc*    regalloc()                  { return _regalloc; }
+  int               frame_slots() const         { return _frame_slots; }
+  int               frame_size_in_words() const; // frame_slots in units of the polymorphic 'words'
+  RegMask&          FIRST_STACK_mask()          { return _FIRST_STACK_mask; }
+  Arena*            indexSet_arena()            { return _indexSet_arena; }
+  void*             indexSet_free_block_list()  { return _indexSet_free_block_list; }
+  uint              node_bundling_limit()       { return _node_bundling_limit; }
+  Bundle*           node_bundling_base()        { return _node_bundling_base; }
+  void          set_node_bundling_limit(uint n) { _node_bundling_limit = n; }
+  void          set_node_bundling_base(Bundle* b) { _node_bundling_base = b; }
+  bool          starts_bundle(const Node *n) const;
+  bool          need_stack_bang(int frame_size_in_bytes) const;
+  bool          need_register_stack_bang() const;
+
+  void          set_matcher(Matcher* m)                 { _matcher = m; }
+//void          set_regalloc(PhaseRegAlloc* ra)           { _regalloc = ra; }
+  void          set_indexSet_arena(Arena* a)            { _indexSet_arena = a; }
+  void          set_indexSet_free_block_list(void* p)   { _indexSet_free_block_list = p; }
+
+  // Remember if this compilation changes hardware mode to 24-bit precision
+  void set_24_bit_selection_and_mode(bool selection, bool mode) {
+    _select_24_bit_instr = selection;
+    _in_24_bit_fp_mode   = mode;
+  }
+
+  void set_has_java_calls(bool z) { _has_java_calls = z; }
+
+  // Instruction bits passed off to the VM
+  int               code_size()                 { return _method_size; }
+  CodeBuffer*       code_buffer()               { return &_code_buffer; }
+  int               first_block_size()          { return _first_block_size; }
+  void              set_frame_complete(int off) { _code_offsets.set_value(CodeOffsets::Frame_Complete, off); }
+  ExceptionHandlerTable*  handler_table()       { return &_handler_table; }
+  ImplicitExceptionTable* inc_table()           { return &_inc_table; }
+  OopMapSet*        oop_map_set()               { return _oop_map_set; }
+  DebugInformationRecorder* debug_info()        { return env()->debug_info(); }
+  Dependencies*     dependencies()              { return env()->dependencies(); }
+  static int        CompiledZap_count()         { return _CompiledZap_count; }
+  BufferBlob*       scratch_buffer_blob()       { return _scratch_buffer_blob; }
+  void         init_scratch_buffer_blob();
+  void          set_scratch_buffer_blob(BufferBlob* b) { _scratch_buffer_blob = b; }
+  relocInfo*        scratch_locs_memory()       { return _scratch_locs_memory; }
+  void          set_scratch_locs_memory(relocInfo* b)  { _scratch_locs_memory = b; }
+
+  // emit to scratch blob, report resulting size
+  uint              scratch_emit_size(const Node* n);
+
+  enum ScratchBufferBlob {
+    MAX_inst_size       = 1024,
+    MAX_locs_size       = 128, // number of relocInfo elements
+    MAX_const_size      = 128,
+    MAX_stubs_size      = 128
+  };
+
+  // Major entry point.  Given a Scope, compile the associated method.
+  // For normal compilations, entry_bci is InvocationEntryBci.  For on stack
+  // replacement, entry_bci indicates the bytecode for which to compile a
+  // continuation.
+  Compile(ciEnv* ci_env, C2Compiler* compiler, ciMethod* target,
+          int entry_bci, bool subsume_loads);
+
+  // Second major entry point.  From the TypeFunc signature, generate code
+  // to pass arguments from the Java calling convention to the C calling
+  // convention.
+  Compile(ciEnv* ci_env, const TypeFunc *(*gen)(),
+          address stub_function, const char *stub_name,
+          int is_fancy_jump, bool pass_tls,
+          bool save_arg_registers, bool return_pc);
+
+  // From the TypeFunc signature, generate code to pass arguments
+  // from Compiled calling convention to Interpreter's calling convention
+  void Generate_Compiled_To_Interpreter_Graph(const TypeFunc *tf, address interpreter_entry);
+
+  // From the TypeFunc signature, generate code to pass arguments
+  // from Interpreter's calling convention to Compiler's calling convention
+  void Generate_Interpreter_To_Compiled_Graph(const TypeFunc *tf);
+
+  // Are we compiling a method?
+  bool has_method() { return method() != NULL; }
+
+  // Maybe print some information about this compile.
+  void print_compile_messages();
+
+  // Final graph reshaping, a post-pass after the regular optimizer is done.
+  bool final_graph_reshaping();
+
+  // returns true if adr is completely contained in the given alias category
+  bool must_alias(const TypePtr* adr, int alias_idx);
+
+  // returns true if adr overlaps with the given alias category
+  bool can_alias(const TypePtr* adr, int alias_idx);
+
+  // Driver for converting compiler's IR into machine code bits
+  void Output();
+
+  // Accessors for node bundling info.
+  Bundle* node_bundling(const Node *n);
+  bool valid_bundle_info(const Node *n);
+
+  // Schedule and Bundle the instructions
+  void ScheduleAndBundle();
+
+  // Build OopMaps for each GC point
+  void BuildOopMaps();
+  // Append debug info for the node to the array
+  void FillLocArray( int idx, Node *local, GrowableArray<ScopeValue*> *array );
+
+  // Process an OopMap Element while emitting nodes
+  void Process_OopMap_Node(MachNode *mach, int code_offset);
+
+  // Write out basic block data to code buffer
+  void Fill_buffer();
+
+  // Determine which variable sized branches can be shortened
+  void Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size);
+
+  // Compute the size of first NumberOfLoopInstrToAlign instructions
+  // at the head of a loop.
+  void compute_loop_first_inst_sizes();
+
+  // Compute the information for the exception tables
+  void FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels);
+
+  // Stack slots that may be unused by the calling convention but must
+  // otherwise be preserved.  On Intel this includes the return address.
+  // On PowerPC it includes the 4 words holding the old TOC & LR glue.
+  uint in_preserve_stack_slots();
+
+  // "Top of Stack" slots that may be unused by the calling convention but must
+  // otherwise be preserved.
+  // On Intel these are not necessary and the value can be zero.
+  // On Sparc this describes the words reserved for storing a register window
+  // when an interrupt occurs.
+  static uint out_preserve_stack_slots();
+
+  // Number of outgoing stack slots killed above the out_preserve_stack_slots
+  // for calls to C.  Supports the var-args backing area for register parms.
+  uint varargs_C_out_slots_killed() const;
+
+  // Number of Stack Slots consumed by a synchronization entry
+  int sync_stack_slots() const;
+
+  // Compute the name of old_SP.  See <arch>.ad for frame layout.
+  OptoReg::Name compute_old_SP();
+
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+  static bool is_node_getting_a_safepoint(Node*);
+  void Insert_zap_nodes();
+  Node* call_zap_node(MachSafePointNode* n, int block_no);
+#endif
+
+ private:
+  // Phase control:
+  void Init(int aliaslevel);                     // Prepare for a single compilation
+  int  Inline_Warm();                            // Find more inlining work.
+  void Finish_Warm();                            // Give up on further inlines.
+  void Optimize();                               // Given a graph, optimize it
+  void Code_Gen();                               // Generate code from a graph
+
+  // Management of the AliasType table.
+  void grow_alias_types();
+  AliasCacheEntry* probe_alias_cache(const TypePtr* adr_type);
+  const TypePtr *flatten_alias_type(const TypePtr* adr_type) const;
+  AliasType* find_alias_type(const TypePtr* adr_type, bool no_create);
+
+  void verify_top(Node*) const PRODUCT_RETURN;
+
+  // Intrinsic setup.
+  void           register_library_intrinsics();                            // initializer
+  CallGenerator* make_vm_intrinsic(ciMethod* m, bool is_virtual);          // constructor
+  int            intrinsic_insertion_index(ciMethod* m, bool is_virtual);  // helper
+  CallGenerator* find_intrinsic(ciMethod* m, bool is_virtual);             // query fn
+  void           register_intrinsic(CallGenerator* cg);                    // update fn
+
+#ifndef PRODUCT
+  static juint  _intrinsic_hist_count[vmIntrinsics::ID_LIMIT];
+  static jubyte _intrinsic_hist_flags[vmIntrinsics::ID_LIMIT];
+#endif
+
+ public:
+
+  // Note:  Histogram array size is about 1 Kb.
+  enum {                        // flag bits:
+    _intrinsic_worked = 1,      // succeeded at least once
+    _intrinsic_failed = 2,      // tried it but it failed
+    _intrinsic_disabled = 4,    // was requested but disabled (e.g., -XX:-InlineUnsafeOps)
+    _intrinsic_virtual = 8,     // was seen in the virtual form (rare)
+    _intrinsic_both = 16        // was seen in the non-virtual form (usual)
+  };
+  // Update histogram.  Return boolean if this is a first-time occurrence.
+  static bool gather_intrinsic_statistics(vmIntrinsics::ID id,
+                                          bool is_virtual, int flags) PRODUCT_RETURN0;
+  static void print_intrinsic_statistics() PRODUCT_RETURN;
+
+  // Graph verification code
+  // Walk the node list, verifying that there is a one-to-one
+  // correspondence between Use-Def edges and Def-Use edges
+  // The option no_dead_code enables stronger checks that the
+  // graph is strongly connected from root in both directions.
+  void verify_graph_edges(bool no_dead_code = false) PRODUCT_RETURN;
+
+  // Print bytecodes, including the scope inlining tree
+  void print_codes();
+
+  // End-of-run dumps.
+  static void print_statistics() PRODUCT_RETURN;
+
+  // Dump formatted assembly
+  void dump_asm(int *pcs = NULL, uint pc_limit = 0) PRODUCT_RETURN;
+  void dump_pc(int *pcs, int pc_limit, Node *n);
+
+  // Verify ADLC assumptions during startup
+  static void adlc_verification() PRODUCT_RETURN;
+
+  // Definitions of pd methods
+  static void pd_compiler2_init();
+};
diff --git a/src/share/vm/opto/connode.cpp b/src/share/vm/opto/connode.cpp
new file mode 100644
index 000000000..6896f3471
--- /dev/null
+++ b/src/share/vm/opto/connode.cpp
@@ -0,0 +1,1227 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_connode.cpp.incl"
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+uint ConNode::hash() const {
+  return (uintptr_t)in(TypeFunc::Control) + _type->hash();
+}
+
+//------------------------------make-------------------------------------------
+ConNode *ConNode::make( Compile* C, const Type *t ) {
+  switch( t->basic_type() ) {
+  case T_INT:       return new (C, 1) ConINode( t->is_int() );
+  case T_ARRAY:     return new (C, 1) ConPNode( t->is_aryptr() );
+  case T_LONG:      return new (C, 1) ConLNode( t->is_long() );
+  case T_FLOAT:     return new (C, 1) ConFNode( t->is_float_constant() );
+  case T_DOUBLE:    return new (C, 1) ConDNode( t->is_double_constant() );
+  case T_VOID:      return new (C, 1) ConNode ( Type::TOP );
+  case T_OBJECT:    return new (C, 1) ConPNode( t->is_oopptr() );
+  case T_ADDRESS:   return new (C, 1) ConPNode( t->is_ptr() );
+    // Expected cases:  TypePtr::NULL_PTR, any is_rawptr()
+    // Also seen: AnyPtr(TopPTR *+top); from command line:
+    //   r -XX:+PrintOpto -XX:CIStart=285 -XX:+CompileTheWorld -XX:CompileTheWorldStartAt=660
+    // %%%% Stop using TypePtr::NULL_PTR to represent nulls:  use either TypeRawPtr::NULL_PTR
+    // or else TypeOopPtr::NULL_PTR.  Then set Type::_basic_type[AnyPtr] = T_ILLEGAL
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//=============================================================================
+/*
+The major change is for CMoveP and StrComp.  They have related but slightly
+different problems.  They both take in TWO oops which are both null-checked
+independently before the using Node.  After CCP removes the CastPP's they need
+to pick up the guarding test edge - in this case TWO control edges.  I tried
+various solutions, all have problems:
+
+(1) Do nothing.  This leads to a bug where we hoist a Load from a CMoveP or a
+StrComp above a guarding null check.  I've seen both cases in normal -Xcomp
+testing.
+
+(2) Plug the control edge from 1 of the 2 oops in.  Apparent problem here is
+to figure out which test post-dominates.  The real problem is that it doesn't
+matter which one you pick.  After you pick up, the dominating-test elider in
+IGVN can remove the test and allow you to hoist up to the dominating test on
+the choosen oop bypassing the test on the not-choosen oop.  Seen in testing.
+Oops.
+
+(3) Leave the CastPP's in.  This makes the graph more accurate in some sense;
+we get to keep around the knowledge that an oop is not-null after some test.
+Alas, the CastPP's interfere with GVN (some values are the regular oop, some
+are the CastPP of the oop, all merge at Phi's which cannot collapse, etc).
+This cost us 10% on SpecJVM, even when I removed some of the more trivial
+cases in the optimizer.  Removing more useless Phi's started allowing Loads to
+illegally float above null checks.  I gave up on this approach.
+
+(4) Add BOTH control edges to both tests.  Alas, too much code knows that
+control edges are in slot-zero ONLY.  Many quick asserts fail; no way to do
+this one.  Note that I really want to allow the CMoveP to float and add both
+control edges to the dependent Load op - meaning I can select early but I
+cannot Load until I pass both tests.
+
+(5) Do not hoist CMoveP and StrComp.  To this end I added the v-call
+depends_only_on_test().  No obvious performance loss on Spec, but we are
+clearly conservative on CMoveP (also so on StrComp but that's unlikely to
+matter ever).
+
+*/
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Move constants to the right.
+Node *CMoveNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(0) && remove_dead_region(phase, can_reshape) ) return this;
+  assert( !phase->eqv(in(Condition), this) &&
+          !phase->eqv(in(IfFalse), this) &&
+          !phase->eqv(in(IfTrue), this), "dead loop in CMoveNode::Ideal" );
+  if( phase->type(in(Condition)) == Type::TOP )
+    return NULL; // return NULL when Condition is dead
+
+  if( in(IfFalse)->is_Con() && !in(IfTrue)->is_Con() ) {
+    if( in(Condition)->is_Bool() ) {
+      BoolNode* b  = in(Condition)->as_Bool();
+      BoolNode* b2 = b->negate(phase);
+      return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+    }
+  }
+  return NULL;
+}
+
+//------------------------------is_cmove_id------------------------------------
+// Helper function to check for CMOVE identity.  Shared with PhiNode::Identity
+Node *CMoveNode::is_cmove_id( PhaseTransform *phase, Node *cmp, Node *t, Node *f, BoolNode *b ) {
+  // Check for Cmp'ing and CMove'ing same values
+  if( (phase->eqv(cmp->in(1),f) &&
+       phase->eqv(cmp->in(2),t)) ||
+      // Swapped Cmp is OK
+      (phase->eqv(cmp->in(2),f) &&
+       phase->eqv(cmp->in(1),t)) ) {
+    // Check for "(t==f)?t:f;" and replace with "f"
+    if( b->_test._test == BoolTest::eq )
+      return f;
+    // Allow the inverted case as well
+    // Check for "(t!=f)?t:f;" and replace with "t"
+    if( b->_test._test == BoolTest::ne )
+      return t;
+  }
+  return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Conditional-move is an identity if both inputs are the same, or the test
+// true or false.
+Node *CMoveNode::Identity( PhaseTransform *phase ) {
+  if( phase->eqv(in(IfFalse),in(IfTrue)) ) // C-moving identical inputs?
+    return in(IfFalse);         // Then it doesn't matter
+  if( phase->type(in(Condition)) == TypeInt::ZERO )
+    return in(IfFalse);         // Always pick left(false) input
+  if( phase->type(in(Condition)) == TypeInt::ONE )
+    return in(IfTrue);          // Always pick right(true) input
+
+  // Check for CMove'ing a constant after comparing against the constant.
+  // Happens all the time now, since if we compare equality vs a constant in
+  // the parser, we "know" the variable is constant on one path and we force
+  // it.  Thus code like "if( x==0 ) {/*EMPTY*/}" ends up inserting a
+  // conditional move: "x = (x==0)?0:x;".  Yucko.  This fix is slightly more
+  // general in that we don't need constants.
+  if( in(Condition)->is_Bool() ) {
+    BoolNode *b = in(Condition)->as_Bool();
+    Node *cmp = b->in(1);
+    if( cmp->is_Cmp() ) {
+      Node *id = is_cmove_id( phase, cmp, in(IfTrue), in(IfFalse), b );
+      if( id ) return id;
+    }
+  }
+
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+// Result is the meet of inputs
+const Type *CMoveNode::Value( PhaseTransform *phase ) const {
+  if( phase->type(in(Condition)) == Type::TOP )
+    return Type::TOP;
+  return phase->type(in(IfFalse))->meet(phase->type(in(IfTrue)));
+}
+
+//------------------------------make-------------------------------------------
+// Make a correctly-flavored CMove.  Since _type is directly determined
+// from the inputs we do not need to specify it here.
+CMoveNode *CMoveNode::make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t ) {
+  switch( t->basic_type() ) {
+  case T_INT:     return new (C, 4) CMoveINode( bol, left, right, t->is_int() );
+  case T_FLOAT:   return new (C, 4) CMoveFNode( bol, left, right, t );
+  case T_DOUBLE:  return new (C, 4) CMoveDNode( bol, left, right, t );
+  case T_LONG:    return new (C, 4) CMoveLNode( bol, left, right, t->is_long() );
+  case T_OBJECT:  return new (C, 4) CMovePNode( c, bol, left, right, t->is_oopptr() );
+  case T_ADDRESS: return new (C, 4) CMovePNode( c, bol, left, right, t->is_ptr() );
+  default:
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for conversions to boolean
+Node *CMoveINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Try generic ideal's first
+  Node *x = CMoveNode::Ideal(phase, can_reshape);
+  if( x ) return x;
+
+  // If zero is on the left (false-case, no-move-case) it must mean another
+  // constant is on the right (otherwise the shared CMove::Ideal code would
+  // have moved the constant to the right).  This situation is bad for Intel
+  // and a don't-care for Sparc.  It's bad for Intel because the zero has to
+  // be manifested in a register with a XOR which kills flags, which are live
+  // on input to the CMoveI, leading to a situation which causes excessive
+  // spilling on Intel.  For Sparc, if the zero in on the left the Sparc will
+  // zero a register via G0 and conditionally-move the other constant.  If the
+  // zero is on the right, the Sparc will load the first constant with a
+  // 13-bit set-lo and conditionally move G0.  See bug 4677505.
+  if( phase->type(in(IfFalse)) == TypeInt::ZERO && !(phase->type(in(IfTrue)) == TypeInt::ZERO) ) {
+    if( in(Condition)->is_Bool() ) {
+      BoolNode* b  = in(Condition)->as_Bool();
+      BoolNode* b2 = b->negate(phase);
+      return make( phase->C, in(Control), phase->transform(b2), in(IfTrue), in(IfFalse), _type );
+    }
+  }
+
+  // Now check for booleans
+  int flip = 0;
+
+  // Check for picking from zero/one
+  if( phase->type(in(IfFalse)) == TypeInt::ZERO && phase->type(in(IfTrue)) == TypeInt::ONE ) {
+    flip = 1 - flip;
+  } else if( phase->type(in(IfFalse)) == TypeInt::ONE && phase->type(in(IfTrue)) == TypeInt::ZERO ) {
+  } else return NULL;
+
+  // Check for eq/ne test
+  if( !in(1)->is_Bool() ) return NULL;
+  BoolNode *bol = in(1)->as_Bool();
+  if( bol->_test._test == BoolTest::eq ) {
+  } else if( bol->_test._test == BoolTest::ne ) {
+    flip = 1-flip;
+  } else return NULL;
+
+  // Check for vs 0 or 1
+  if( !bol->in(1)->is_Cmp() ) return NULL;
+  const CmpNode *cmp = bol->in(1)->as_Cmp();
+  if( phase->type(cmp->in(2)) == TypeInt::ZERO ) {
+  } else if( phase->type(cmp->in(2)) == TypeInt::ONE ) {
+    // Allow cmp-vs-1 if the other input is bounded by 0-1
+    if( phase->type(cmp->in(1)) != TypeInt::BOOL )
+      return NULL;
+    flip = 1 - flip;
+  } else return NULL;
+
+  // Convert to a bool (flipped)
+  // Build int->bool conversion
+#ifndef PRODUCT
+  if( PrintOpto ) tty->print_cr("CMOV to I2B");
+#endif
+  Node *n = new (phase->C, 2) Conv2BNode( cmp->in(1) );
+  if( flip )
+    n = new (phase->C, 3) XorINode( phase->transform(n), phase->intcon(1) );
+
+  return n;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for absolute value
+Node *CMoveFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Try generic ideal's first
+  Node *x = CMoveNode::Ideal(phase, can_reshape);
+  if( x ) return x;
+
+  int  cmp_zero_idx = 0;        // Index of compare input where to look for zero
+  int  phi_x_idx = 0;           // Index of phi input where to find naked x
+
+  // Find the Bool
+  if( !in(1)->is_Bool() ) return NULL;
+  BoolNode *bol = in(1)->as_Bool();
+  // Check bool sense
+  switch( bol->_test._test ) {
+  case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = IfTrue;  break;
+  case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = IfFalse; break;
+  case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = IfTrue;  break;
+  case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = IfFalse; break;
+  default:           return NULL;                           break;
+  }
+
+  // Find zero input of CmpF; the other input is being abs'd
+  Node *cmpf = bol->in(1);
+  if( cmpf->Opcode() != Op_CmpF ) return NULL;
+  Node *X = NULL;
+  bool flip = false;
+  if( phase->type(cmpf->in(cmp_zero_idx)) == TypeF::ZERO ) {
+    X = cmpf->in(3 - cmp_zero_idx);
+  } else if (phase->type(cmpf->in(3 - cmp_zero_idx)) == TypeF::ZERO) {
+    // The test is inverted, we should invert the result...
+    X = cmpf->in(cmp_zero_idx);
+    flip = true;
+  } else {
+    return NULL;
+  }
+
+  // If X is found on the appropriate phi input, find the subtract on the other
+  if( X != in(phi_x_idx) ) return NULL;
+  int phi_sub_idx = phi_x_idx == IfTrue ? IfFalse : IfTrue;
+  Node *sub = in(phi_sub_idx);
+
+  // Allow only SubF(0,X) and fail out for all others; NegF is not OK
+  if( sub->Opcode() != Op_SubF ||
+      sub->in(2) != X ||
+      phase->type(sub->in(1)) != TypeF::ZERO ) return NULL;
+
+  Node *abs = new (phase->C, 2) AbsFNode( X );
+  if( flip )
+    abs = new (phase->C, 3) SubFNode(sub->in(1), phase->transform(abs));
+
+  return abs;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Check for absolute value
+Node *CMoveDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Try generic ideal's first
+  Node *x = CMoveNode::Ideal(phase, can_reshape);
+  if( x ) return x;
+
+  int  cmp_zero_idx = 0;        // Index of compare input where to look for zero
+  int  phi_x_idx = 0;           // Index of phi input where to find naked x
+
+  // Find the Bool
+  if( !in(1)->is_Bool() ) return NULL;
+  BoolNode *bol = in(1)->as_Bool();
+  // Check bool sense
+  switch( bol->_test._test ) {
+  case BoolTest::lt: cmp_zero_idx = 1; phi_x_idx = IfTrue;  break;
+  case BoolTest::le: cmp_zero_idx = 2; phi_x_idx = IfFalse; break;
+  case BoolTest::gt: cmp_zero_idx = 2; phi_x_idx = IfTrue;  break;
+  case BoolTest::ge: cmp_zero_idx = 1; phi_x_idx = IfFalse; break;
+  default:           return NULL;                           break;
+  }
+
+  // Find zero input of CmpD; the other input is being abs'd
+  Node *cmpd = bol->in(1);
+  if( cmpd->Opcode() != Op_CmpD ) return NULL;
+  Node *X = NULL;
+  bool flip = false;
+  if( phase->type(cmpd->in(cmp_zero_idx)) == TypeD::ZERO ) {
+    X = cmpd->in(3 - cmp_zero_idx);
+  } else if (phase->type(cmpd->in(3 - cmp_zero_idx)) == TypeD::ZERO) {
+    // The test is inverted, we should invert the result...
+    X = cmpd->in(cmp_zero_idx);
+    flip = true;
+  } else {
+    return NULL;
+  }
+
+  // If X is found on the appropriate phi input, find the subtract on the other
+  if( X != in(phi_x_idx) ) return NULL;
+  int phi_sub_idx = phi_x_idx == IfTrue ? IfFalse : IfTrue;
+  Node *sub = in(phi_sub_idx);
+
+  // Allow only SubD(0,X) and fail out for all others; NegD is not OK
+  if( sub->Opcode() != Op_SubD ||
+      sub->in(2) != X ||
+      phase->type(sub->in(1)) != TypeD::ZERO ) return NULL;
+
+  Node *abs = new (phase->C, 2) AbsDNode( X );
+  if( flip )
+    abs = new (phase->C, 3) SubDNode(sub->in(1), phase->transform(abs));
+
+  return abs;
+}
+
+
+//=============================================================================
+// If input is already higher or equal to cast type, then this is an identity.
+Node *ConstraintCastNode::Identity( PhaseTransform *phase ) {
+  return phase->type(in(1))->higher_equal(_type) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// Take 'join' of input and cast-up type
+const Type *ConstraintCastNode::Value( PhaseTransform *phase ) const {
+  if( in(0) && phase->type(in(0)) == Type::TOP ) return Type::TOP;
+  const Type* ft = phase->type(in(1))->filter(_type);
+
+#ifdef ASSERT
+  // Previous versions of this function had some special case logic,
+  // which is no longer necessary.  Make sure of the required effects.
+  switch (Opcode()) {
+  case Op_CastII:
+    {
+      const Type* t1 = phase->type(in(1));
+      if( t1 == Type::TOP )  assert(ft == Type::TOP, "special case #1");
+      const Type* rt = t1->join(_type);
+      if (rt->empty())       assert(ft == Type::TOP, "special case #2");
+      break;
+    }
+  case Op_CastPP:
+    if (phase->type(in(1)) == TypePtr::NULL_PTR &&
+        _type->isa_ptr() && _type->is_ptr()->_ptr == TypePtr::NotNull)
+      assert(ft == Type::TOP, "special case #3");
+    break;
+  }
+#endif //ASSERT
+
+  return ft;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *ConstraintCastNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Throw away cast after constant propagation
+Node *ConstraintCastNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+  const Type *t = ccp->type(in(1));
+  ccp->hash_delete(this);
+  set_type(t);                   // Turn into ID function
+  ccp->hash_insert(this);
+  return this;
+}
+
+
+//=============================================================================
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// If not converting int->oop, throw away cast after constant propagation
+Node *CastPPNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+  const Type *t = ccp->type(in(1));
+  if (!t->isa_oop_ptr()) {
+    return NULL;                // do not transform raw pointers
+  }
+  return ConstraintCastNode::Ideal_DU_postCCP(ccp);
+}
+
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If input is already higher or equal to cast type, then this is an identity.
+Node *CheckCastPPNode::Identity( PhaseTransform *phase ) {
+  // Toned down to rescue meeting at a Phi 3 different oops all implementing
+  // the same interface.  CompileTheWorld starting at 502, kd12rc1.zip.
+  return (phase->type(in(1)) == phase->type(this)) ? in(1) : this;
+}
+
+// Determine whether "n" is a node which can cause an alias of one of its inputs.  Node types
+// which can create aliases are: CheckCastPP, Phi, and any store (if there is also a load from
+// the location.)
+// Note:  this checks for aliases created in this compilation, not ones which may
+//        be potentially created at call sites.
+static bool can_cause_alias(Node *n, PhaseTransform *phase) {
+  bool possible_alias = false;
+
+  if (n->is_Store()) {
+    possible_alias = !n->as_Store()->value_never_loaded(phase);
+  } else {
+    int opc = n->Opcode();
+    possible_alias = n->is_Phi() ||
+        opc == Op_CheckCastPP ||
+        opc == Op_StorePConditional ||
+        opc == Op_CompareAndSwapP;
+  }
+  return possible_alias;
+}
+
+//------------------------------Value------------------------------------------
+// Take 'join' of input and cast-up type, unless working with an Interface
+const Type *CheckCastPPNode::Value( PhaseTransform *phase ) const {
+  if( in(0) && phase->type(in(0)) == Type::TOP ) return Type::TOP;
+
+  const Type *inn = phase->type(in(1));
+  if( inn == Type::TOP ) return Type::TOP;  // No information yet
+
+  const TypePtr *in_type   = inn->isa_ptr();
+  const TypePtr *my_type   = _type->isa_ptr();
+  const Type *result = _type;
+  if( in_type != NULL && my_type != NULL ) {
+    TypePtr::PTR   in_ptr    = in_type->ptr();
+    if( in_ptr == TypePtr::Null ) {
+      result = in_type;
+    } else if( in_ptr == TypePtr::Constant ) {
+      // Casting a constant oop to an interface?
+      // (i.e., a String to a Comparable?)
+      // Then return the interface.
+      const TypeOopPtr *jptr = my_type->isa_oopptr();
+      assert( jptr, "" );
+      result =  (jptr->klass()->is_interface() || !in_type->higher_equal(_type))
+        ? my_type->cast_to_ptr_type( TypePtr::NotNull )
+        : in_type;
+    } else {
+      result =  my_type->cast_to_ptr_type( my_type->join_ptr(in_ptr) );
+    }
+  }
+  return result;
+
+  // JOIN NOT DONE HERE BECAUSE OF INTERFACE ISSUES.
+  // FIX THIS (DO THE JOIN) WHEN UNION TYPES APPEAR!
+
+  //
+  // Remove this code after overnight run indicates no performance
+  // loss from not performing JOIN at CheckCastPPNode
+  //
+  // const TypeInstPtr *in_oop = in->isa_instptr();
+  // const TypeInstPtr *my_oop = _type->isa_instptr();
+  // // If either input is an 'interface', return destination type
+  // assert (in_oop == NULL || in_oop->klass() != NULL, "");
+  // assert (my_oop == NULL || my_oop->klass() != NULL, "");
+  // if( (in_oop && in_oop->klass()->klass_part()->is_interface())
+  //   ||(my_oop && my_oop->klass()->klass_part()->is_interface()) ) {
+  //   TypePtr::PTR  in_ptr = in->isa_ptr() ? in->is_ptr()->_ptr : TypePtr::BotPTR;
+  //   // Preserve cast away nullness for interfaces
+  //   if( in_ptr == TypePtr::NotNull && my_oop && my_oop->_ptr == TypePtr::BotPTR ) {
+  //     return my_oop->cast_to_ptr_type(TypePtr::NotNull);
+  //   }
+  //   return _type;
+  // }
+  //
+  // // Neither the input nor the destination type is an interface,
+  //
+  // // history: JOIN used to cause weird corner case bugs
+  // //          return (in == TypeOopPtr::NULL_PTR) ? in : _type;
+  // // JOIN picks up NotNull in common instance-of/check-cast idioms, both oops.
+  // // JOIN does not preserve NotNull in other cases, e.g. RawPtr vs InstPtr
+  // const Type *join = in->join(_type);
+  // // Check if join preserved NotNull'ness for pointers
+  // if( join->isa_ptr() && _type->isa_ptr() ) {
+  //   TypePtr::PTR join_ptr = join->is_ptr()->_ptr;
+  //   TypePtr::PTR type_ptr = _type->is_ptr()->_ptr;
+  //   // If there isn't any NotNull'ness to preserve
+  //   // OR if join preserved NotNull'ness then return it
+  //   if( type_ptr == TypePtr::BotPTR  || type_ptr == TypePtr::Null ||
+  //       join_ptr == TypePtr::NotNull || join_ptr == TypePtr::Constant ) {
+  //     return join;
+  //   }
+  //   // ELSE return same old type as before
+  //   return _type;
+  // }
+  // // Not joining two pointers
+  // return join;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *CheckCastPPNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *Conv2BNode::Identity( PhaseTransform *phase ) {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return in(1);
+  if( t == TypeInt::ZERO ) return in(1);
+  if( t == TypeInt::ONE ) return in(1);
+  if( t == TypeInt::BOOL ) return in(1);
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *Conv2BNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == TypeInt::ZERO ) return TypeInt::ZERO;
+  if( t == TypePtr::NULL_PTR ) return TypeInt::ZERO;
+  const TypePtr *tp = t->isa_ptr();
+  if( tp != NULL ) {
+    if( tp->ptr() == TypePtr::AnyNull ) return Type::TOP;
+    if( tp->ptr() == TypePtr::Constant) return TypeInt::ONE;
+    if (tp->ptr() == TypePtr::NotNull)  return TypeInt::ONE;
+    return TypeInt::BOOL;
+  }
+  if (t->base() != Type::Int) return TypeInt::BOOL;
+  const TypeInt *ti = t->is_int();
+  if( ti->_hi < 0 || ti->_lo > 0 ) return TypeInt::ONE;
+  return TypeInt::BOOL;
+}
+
+
+// The conversions operations are all Alpha sorted.  Please keep it that way!
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2FNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == Type::DOUBLE ) return Type::FLOAT;
+  const TypeD *td = t->is_double_constant();
+  return TypeF::make( (float)td->getd() );
+}
+
+//------------------------------Identity---------------------------------------
+// Float's can be converted to doubles with no loss of bits.  Hence
+// converting a float to a double and back to a float is a NOP.
+Node *ConvD2FNode::Identity(PhaseTransform *phase) {
+  return (in(1)->Opcode() == Op_ConvF2D) ? in(1)->in(1) : this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2INode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == Type::DOUBLE ) return TypeInt::INT;
+  const TypeD *td = t->is_double_constant();
+  return TypeInt::make( SharedRuntime::d2i( td->getd() ) );
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvD2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_RoundDouble )
+    set_req(1,in(1)->in(1));
+  return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Int's can be converted to doubles with no loss of bits.  Hence
+// converting an integer to a double and back to an integer is a NOP.
+Node *ConvD2INode::Identity(PhaseTransform *phase) {
+  return (in(1)->Opcode() == Op_ConvI2D) ? in(1)->in(1) : this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvD2LNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == Type::DOUBLE ) return TypeLong::LONG;
+  const TypeD *td = t->is_double_constant();
+  return TypeLong::make( SharedRuntime::d2l( td->getd() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvD2LNode::Identity(PhaseTransform *phase) {
+  // Remove ConvD2L->ConvL2D->ConvD2L sequences.
+  if( in(1)       ->Opcode() == Op_ConvL2D &&
+      in(1)->in(1)->Opcode() == Op_ConvD2L )
+    return in(1)->in(1);
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvD2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_RoundDouble )
+    set_req(1,in(1)->in(1));
+  return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2DNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == Type::FLOAT ) return Type::DOUBLE;
+  const TypeF *tf = t->is_float_constant();
+#ifndef IA64
+  return TypeD::make( (double)tf->getf() );
+#else
+  float x = tf->getf();
+  return TypeD::make( (x == 0.0f) ? (double)x : (double)x + ia64_double_zero );
+#endif
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2INode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP )       return Type::TOP;
+  if( t == Type::FLOAT ) return TypeInt::INT;
+  const TypeF *tf = t->is_float_constant();
+  return TypeInt::make( SharedRuntime::f2i( tf->getf() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvF2INode::Identity(PhaseTransform *phase) {
+  // Remove ConvF2I->ConvI2F->ConvF2I sequences.
+  if( in(1)       ->Opcode() == Op_ConvI2F &&
+      in(1)->in(1)->Opcode() == Op_ConvF2I )
+    return in(1)->in(1);
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvF2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_RoundFloat )
+    set_req(1,in(1)->in(1));
+  return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvF2LNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP )       return Type::TOP;
+  if( t == Type::FLOAT ) return TypeLong::LONG;
+  const TypeF *tf = t->is_float_constant();
+  return TypeLong::make( SharedRuntime::f2l( tf->getf() ) );
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvF2LNode::Identity(PhaseTransform *phase) {
+  // Remove ConvF2L->ConvL2F->ConvF2L sequences.
+  if( in(1)       ->Opcode() == Op_ConvL2F &&
+      in(1)->in(1)->Opcode() == Op_ConvF2L )
+    return in(1)->in(1);
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If converting to an int type, skip any rounding nodes
+Node *ConvF2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_RoundFloat )
+    set_req(1,in(1)->in(1));
+  return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2DNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeInt *ti = t->is_int();
+  if( ti->is_con() ) return TypeD::make( (double)ti->get_con() );
+  return bottom_type();
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2FNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeInt *ti = t->is_int();
+  if( ti->is_con() ) return TypeF::make( (float)ti->get_con() );
+  return bottom_type();
+}
+
+//------------------------------Identity---------------------------------------
+Node *ConvI2FNode::Identity(PhaseTransform *phase) {
+  // Remove ConvI2F->ConvF2I->ConvI2F sequences.
+  if( in(1)       ->Opcode() == Op_ConvF2I &&
+      in(1)->in(1)->Opcode() == Op_ConvI2F )
+    return in(1)->in(1);
+  return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvI2LNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeInt *ti = t->is_int();
+  const Type* tl = TypeLong::make(ti->_lo, ti->_hi, ti->_widen);
+  // Join my declared type against my incoming type.
+  tl = tl->filter(_type);
+  return tl;
+}
+
+#ifdef _LP64
+static inline bool long_ranges_overlap(jlong lo1, jlong hi1,
+                                       jlong lo2, jlong hi2) {
+  // Two ranges overlap iff one range's low point falls in the other range.
+  return (lo2 <= lo1 && lo1 <= hi2) || (lo1 <= lo2 && lo2 <= hi1);
+}
+#endif
+
+//------------------------------Ideal------------------------------------------
+Node *ConvI2LNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const TypeLong* this_type = this->type()->is_long();
+  Node* this_changed = NULL;
+
+  // If _major_progress, then more loop optimizations follow.  Do NOT
+  // remove this node's type assertion until no more loop ops can happen.
+  // The progress bit is set in the major loop optimizations THEN comes the
+  // call to IterGVN and any chance of hitting this code.  Cf. Opaque1Node.
+  if (can_reshape && !phase->C->major_progress()) {
+    const TypeInt* in_type = phase->type(in(1))->isa_int();
+    if (in_type != NULL && this_type != NULL &&
+        (in_type->_lo != this_type->_lo ||
+         in_type->_hi != this_type->_hi)) {
+      // Although this WORSENS the type, it increases GVN opportunities,
+      // because I2L nodes with the same input will common up, regardless
+      // of slightly differing type assertions.  Such slight differences
+      // arise routinely as a result of loop unrolling, so this is a
+      // post-unrolling graph cleanup.  Choose a type which depends only
+      // on my input.  (Exception:  Keep a range assertion of >=0 or <0.)
+      jlong lo1 = this_type->_lo;
+      jlong hi1 = this_type->_hi;
+      int   w1  = this_type->_widen;
+      if (lo1 != (jint)lo1 ||
+          hi1 != (jint)hi1 ||
+          lo1 > hi1) {
+        // Overflow leads to wraparound, wraparound leads to range saturation.
+        lo1 = min_jint; hi1 = max_jint;
+      } else if (lo1 >= 0) {
+        // Keep a range assertion of >=0.
+        lo1 = 0;        hi1 = max_jint;
+      } else if (hi1 < 0) {
+        // Keep a range assertion of <0.
+        lo1 = min_jint; hi1 = -1;
+      } else {
+        lo1 = min_jint; hi1 = max_jint;
+      }
+      const TypeLong* wtype = TypeLong::make(MAX2((jlong)in_type->_lo, lo1),
+                                             MIN2((jlong)in_type->_hi, hi1),
+                                             MAX2((int)in_type->_widen, w1));
+      if (wtype != type()) {
+        set_type(wtype);
+        // Note: this_type still has old type value, for the logic below.
+        this_changed = this;
+      }
+    }
+  }
+
+#ifdef _LP64
+  // Convert ConvI2L(AddI(x, y)) to AddL(ConvI2L(x), ConvI2L(y)) ,
+  // but only if x and y have subranges that cannot cause 32-bit overflow,
+  // under the assumption that x+y is in my own subrange this->type().
+
+  // This assumption is based on a constraint (i.e., type assertion)
+  // established in Parse::array_addressing or perhaps elsewhere.
+  // This constraint has been adjoined to the "natural" type of
+  // the incoming argument in(0).  We know (because of runtime
+  // checks) - that the result value I2L(x+y) is in the joined range.
+  // Hence we can restrict the incoming terms (x, y) to values such
+  // that their sum also lands in that range.
+
+  // This optimization is useful only on 64-bit systems, where we hope
+  // the addition will end up subsumed in an addressing mode.
+  // It is necessary to do this when optimizing an unrolled array
+  // copy loop such as x[i++] = y[i++].
+
+  // On 32-bit systems, it's better to perform as much 32-bit math as
+  // possible before the I2L conversion, because 32-bit math is cheaper.
+  // There's no common reason to "leak" a constant offset through the I2L.
+  // Addressing arithmetic will not absorb it as part of a 64-bit AddL.
+
+  Node* z = in(1);
+  int op = z->Opcode();
+  if (op == Op_AddI || op == Op_SubI) {
+    Node* x = z->in(1);
+    Node* y = z->in(2);
+    assert (x != z && y != z, "dead loop in ConvI2LNode::Ideal");
+    if (phase->type(x) == Type::TOP)  return this_changed;
+    if (phase->type(y) == Type::TOP)  return this_changed;
+    const TypeInt*  tx = phase->type(x)->is_int();
+    const TypeInt*  ty = phase->type(y)->is_int();
+    const TypeLong* tz = this_type;
+    jlong xlo = tx->_lo;
+    jlong xhi = tx->_hi;
+    jlong ylo = ty->_lo;
+    jlong yhi = ty->_hi;
+    jlong zlo = tz->_lo;
+    jlong zhi = tz->_hi;
+    jlong vbit = CONST64(1) << BitsPerInt;
+    int widen =  MAX2(tx->_widen, ty->_widen);
+    if (op == Op_SubI) {
+      jlong ylo0 = ylo;
+      ylo = -yhi;
+      yhi = -ylo0;
+    }
+    // See if x+y can cause positive overflow into z+2**32
+    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo+vbit, zhi+vbit)) {
+      return this_changed;
+    }
+    // See if x+y can cause negative overflow into z-2**32
+    if (long_ranges_overlap(xlo+ylo, xhi+yhi, zlo-vbit, zhi-vbit)) {
+      return this_changed;
+    }
+    // Now it's always safe to assume x+y does not overflow.
+    // This is true even if some pairs x,y might cause overflow, as long
+    // as that overflow value cannot fall into [zlo,zhi].
+
+    // Confident that the arithmetic is "as if infinite precision",
+    // we can now use z's range to put constraints on those of x and y.
+    // The "natural" range of x [xlo,xhi] can perhaps be narrowed to a
+    // more "restricted" range by intersecting [xlo,xhi] with the
+    // range obtained by subtracting y's range from the asserted range
+    // of the I2L conversion.  Here's the interval arithmetic algebra:
+    //    x == z-y == [zlo,zhi]-[ylo,yhi] == [zlo,zhi]+[-yhi,-ylo]
+    //    => x in [zlo-yhi, zhi-ylo]
+    //    => x in [zlo-yhi, zhi-ylo] INTERSECT [xlo,xhi]
+    //    => x in [xlo MAX zlo-yhi, xhi MIN zhi-ylo]
+    jlong rxlo = MAX2(xlo, zlo - yhi);
+    jlong rxhi = MIN2(xhi, zhi - ylo);
+    // And similarly, x changing place with y:
+    jlong rylo = MAX2(ylo, zlo - xhi);
+    jlong ryhi = MIN2(yhi, zhi - xlo);
+    if (rxlo > rxhi || rylo > ryhi) {
+      return this_changed;  // x or y is dying; don't mess w/ it
+    }
+    if (op == Op_SubI) {
+      jlong rylo0 = rylo;
+      rylo = -ryhi;
+      ryhi = -rylo0;
+    }
+
+    Node* cx = phase->transform( new (phase->C, 2) ConvI2LNode(x, TypeLong::make(rxlo, rxhi, widen)) );
+    Node* cy = phase->transform( new (phase->C, 2) ConvI2LNode(y, TypeLong::make(rylo, ryhi, widen)) );
+    switch (op) {
+    case Op_AddI:  return new (phase->C, 3) AddLNode(cx, cy);
+    case Op_SubI:  return new (phase->C, 3) SubLNode(cx, cy);
+    default:       ShouldNotReachHere();
+    }
+  }
+#endif //_LP64
+
+  return this_changed;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvL2DNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeLong *tl = t->is_long();
+  if( tl->is_con() ) return TypeD::make( (double)tl->get_con() );
+  return bottom_type();
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ConvL2FNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeLong *tl = t->is_long();
+  if( tl->is_con() ) return TypeF::make( (float)tl->get_con() );
+  return bottom_type();
+}
+
+//=============================================================================
+//----------------------------Identity-----------------------------------------
+Node *ConvL2INode::Identity( PhaseTransform *phase ) {
+  // Convert L2I(I2L(x)) => x
+  if (in(1)->Opcode() == Op_ConvI2L)  return in(1)->in(1);
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ConvL2INode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeLong *tl = t->is_long();
+  if (tl->is_con())
+    // Easy case.
+    return TypeInt::make((jint)tl->get_con());
+  return bottom_type();
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Blow off prior masking to int
+Node *ConvL2INode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node *andl = in(1);
+  uint andl_op = andl->Opcode();
+  if( andl_op == Op_AndL ) {
+    // Blow off prior masking to int
+    if( phase->type(andl->in(2)) == TypeLong::make( 0xFFFFFFFF ) ) {
+      set_req(1,andl->in(1));
+      return this;
+    }
+  }
+
+  // Swap with a prior add: convL2I(addL(x,y)) ==> addI(convL2I(x),convL2I(y))
+  // This replaces an 'AddL' with an 'AddI'.
+  if( andl_op == Op_AddL ) {
+    // Don't do this for nodes which have more than one user since
+    // we'll end up computing the long add anyway.
+    if (andl->outcnt() > 1) return NULL;
+
+    Node* x = andl->in(1);
+    Node* y = andl->in(2);
+    assert( x != andl && y != andl, "dead loop in ConvL2INode::Ideal" );
+    if (phase->type(x) == Type::TOP)  return NULL;
+    if (phase->type(y) == Type::TOP)  return NULL;
+    Node *add1 = phase->transform(new (phase->C, 2) ConvL2INode(x));
+    Node *add2 = phase->transform(new (phase->C, 2) ConvL2INode(y));
+    return new (phase->C, 3) AddINode(add1,add2);
+  }
+
+  // Fold up with a prior LoadL: LoadL->ConvL2I ==> LoadI
+  // Requires we understand the 'endianess' of Longs.
+  if( andl_op == Op_LoadL ) {
+    Node *adr = andl->in(MemNode::Address);
+    // VM_LITTLE_ENDIAN is #defined appropriately in the Makefiles
+#ifndef VM_LITTLE_ENDIAN
+    // The transformation can cause problems on BIG_ENDIAN architectures
+    // where the jint is not the same address as the jlong. Specifically, we
+    // will fail to insert an anti-dependence in GCM between the LoadI and a
+    // subsequent StoreL because different memory offsets provoke
+    // flatten_alias_type() into indicating two different types.  See bug
+    // 4755222.
+
+    // Node *base = adr->is_AddP() ? adr->in(AddPNode::Base) : adr;
+    // adr = phase->transform( new (phase->C, 4) AddPNode(base,adr,phase->MakeConX(sizeof(jint))));
+    return NULL;
+#else
+    if (phase->C->alias_type(andl->adr_type())->is_volatile()) {
+      // Picking up the low half by itself bypasses the atomic load and we could
+      // end up with more than one non-atomic load.  See bugs 4432655 and 4526490.
+      // We could go to the trouble of iterating over andl's output edges and
+      // punting only if there's more than one real use, but we don't bother.
+      return NULL;
+    }
+    return new (phase->C, 3) LoadINode(andl->in(MemNode::Control),andl->in(MemNode::Memory),adr,((LoadLNode*)andl)->raw_adr_type());
+#endif
+  }
+
+  return NULL;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *CastX2PNode::Value( PhaseTransform *phase ) const {
+  const Type* t = phase->type(in(1));
+  if (t->base() == Type_X && t->singleton()) {
+    uintptr_t bits = (uintptr_t) t->is_intptr_t()->get_con();
+    if (bits == 0)   return TypePtr::NULL_PTR;
+    return TypeRawPtr::make((address) bits);
+  }
+  return CastX2PNode::bottom_type();
+}
+
+//------------------------------Idealize---------------------------------------
+static inline bool fits_in_int(const Type* t, bool but_not_min_int = false) {
+  if (t == Type::TOP)  return false;
+  const TypeX* tl = t->is_intptr_t();
+  jint lo = min_jint;
+  jint hi = max_jint;
+  if (but_not_min_int)  ++lo;  // caller wants to negate the value w/o overflow
+  return (tl->_lo >= lo) && (tl->_hi <= hi);
+}
+
+static inline Node* addP_of_X2P(PhaseGVN *phase,
+                                Node* base,
+                                Node* dispX,
+                                bool negate = false) {
+  if (negate) {
+    dispX = new (phase->C, 3) SubXNode(phase->MakeConX(0), phase->transform(dispX));
+  }
+  return new (phase->C, 4) AddPNode(phase->C->top(),
+                          phase->transform(new (phase->C, 2) CastX2PNode(base)),
+                          phase->transform(dispX));
+}
+
+Node *CastX2PNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // convert CastX2P(AddX(x, y)) to AddP(CastX2P(x), y) if y fits in an int
+  int op = in(1)->Opcode();
+  Node* x;
+  Node* y;
+  switch (op) {
+  case Op_SubX:
+    x = in(1)->in(1);
+    y = in(1)->in(2);
+    if (fits_in_int(phase->type(y), true)) {
+      return addP_of_X2P(phase, x, y, true);
+    }
+    break;
+  case Op_AddX:
+    x = in(1)->in(1);
+    y = in(1)->in(2);
+    if (fits_in_int(phase->type(y))) {
+      return addP_of_X2P(phase, x, y);
+    }
+    if (fits_in_int(phase->type(x))) {
+      return addP_of_X2P(phase, y, x);
+    }
+    break;
+  }
+  return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+Node *CastX2PNode::Identity( PhaseTransform *phase ) {
+  if (in(1)->Opcode() == Op_CastP2X)  return in(1)->in(1);
+  return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *CastP2XNode::Value( PhaseTransform *phase ) const {
+  const Type* t = phase->type(in(1));
+  if (t->base() == Type::RawPtr && t->singleton()) {
+    uintptr_t bits = (uintptr_t) t->is_rawptr()->get_con();
+    return TypeX::make(bits);
+  }
+  return CastP2XNode::bottom_type();
+}
+
+Node *CastP2XNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return (in(0) && remove_dead_region(phase, can_reshape)) ? this : NULL;
+}
+
+//------------------------------Identity---------------------------------------
+Node *CastP2XNode::Identity( PhaseTransform *phase ) {
+  if (in(1)->Opcode() == Op_CastX2P)  return in(1)->in(1);
+  return this;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Remove redundant roundings
+Node *RoundFloatNode::Identity( PhaseTransform *phase ) {
+  assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel");
+  // Do not round constants
+  if (phase->type(in(1))->base() == Type::FloatCon)  return in(1);
+  int op = in(1)->Opcode();
+  // Redundant rounding
+  if( op == Op_RoundFloat ) return in(1);
+  // Already rounded
+  if( op == Op_Parm ) return in(1);
+  if( op == Op_LoadF ) return in(1);
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *RoundFloatNode::Value( PhaseTransform *phase ) const {
+  return phase->type( in(1) );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// Remove redundant roundings.  Incoming arguments are already rounded.
+Node *RoundDoubleNode::Identity( PhaseTransform *phase ) {
+  assert(Matcher::strict_fp_requires_explicit_rounding, "should only generate for Intel");
+  // Do not round constants
+  if (phase->type(in(1))->base() == Type::DoubleCon)  return in(1);
+  int op = in(1)->Opcode();
+  // Redundant rounding
+  if( op == Op_RoundDouble ) return in(1);
+  // Already rounded
+  if( op == Op_Parm ) return in(1);
+  if( op == Op_LoadD ) return in(1);
+  if( op == Op_ConvF2D ) return in(1);
+  if( op == Op_ConvI2D ) return in(1);
+  return this;
+}
+
+//------------------------------Value------------------------------------------
+const Type *RoundDoubleNode::Value( PhaseTransform *phase ) const {
+  return phase->type( in(1) );
+}
+
+
+//=============================================================================
+// Do not allow value-numbering
+uint Opaque1Node::hash() const { return NO_HASH; }
+uint Opaque1Node::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+//------------------------------Identity---------------------------------------
+// If _major_progress, then more loop optimizations follow.  Do NOT remove
+// the opaque Node until no more loop ops can happen.  Note the timing of
+// _major_progress; it's set in the major loop optimizations THEN comes the
+// call to IterGVN and any chance of hitting this code.  Hence there's no
+// phase-ordering problem with stripping Opaque1 in IGVN followed by some
+// more loop optimizations that require it.
+Node *Opaque1Node::Identity( PhaseTransform *phase ) {
+  return phase->C->major_progress() ? this : in(1);
+}
+
+//=============================================================================
+// A node to prevent unwanted optimizations.  Allows constant folding.  Stops
+// value-numbering, most Ideal calls or Identity functions.  This Node is
+// specifically designed to prevent the pre-increment value of a loop trip
+// counter from being live out of the bottom of the loop (hence causing the
+// pre- and post-increment values both being live and thus requiring an extra
+// temp register and an extra move).  If we "accidentally" optimize through
+// this kind of a Node, we'll get slightly pessimal, but correct, code.  Thus
+// it's OK to be slightly sloppy on optimizations here.
+
+// Do not allow value-numbering
+uint Opaque2Node::hash() const { return NO_HASH; }
+uint Opaque2Node::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+
+//------------------------------Value------------------------------------------
+const Type *MoveL2DNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeLong *tl = t->is_long();
+  if( !tl->is_con() ) return bottom_type();
+  JavaValue v;
+  v.set_jlong(tl->get_con());
+  return TypeD::make( v.get_jdouble() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveI2FNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  const TypeInt *ti = t->is_int();
+  if( !ti->is_con() )   return bottom_type();
+  JavaValue v;
+  v.set_jint(ti->get_con());
+  return TypeF::make( v.get_jfloat() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveF2INode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP )       return Type::TOP;
+  if( t == Type::FLOAT ) return TypeInt::INT;
+  const TypeF *tf = t->is_float_constant();
+  JavaValue v;
+  v.set_jfloat(tf->getf());
+  return TypeInt::make( v.get_jint() );
+}
+
+//------------------------------Value------------------------------------------
+const Type *MoveD2LNode::Value( PhaseTransform *phase ) const {
+  const Type *t = phase->type( in(1) );
+  if( t == Type::TOP ) return Type::TOP;
+  if( t == Type::DOUBLE ) return TypeLong::LONG;
+  const TypeD *td = t->is_double_constant();
+  JavaValue v;
+  v.set_jdouble(td->getd());
+  return TypeLong::make( v.get_jlong() );
+}
diff --git a/src/share/vm/opto/connode.hpp b/src/share/vm/opto/connode.hpp
new file mode 100644
index 000000000..1c1b96a19
--- /dev/null
+++ b/src/share/vm/opto/connode.hpp
@@ -0,0 +1,578 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class PhaseTransform;
+class MachNode;
+
+//------------------------------ConNode----------------------------------------
+// Simple constants
+class ConNode : public TypeNode {
+public:
+  ConNode( const Type *t ) : TypeNode(t,1) {
+    init_req(0, (Node*)Compile::current()->root());
+    init_flags(Flag_is_Con);
+  }
+  virtual int  Opcode() const;
+  virtual uint hash() const;
+  virtual const RegMask &out_RegMask() const { return RegMask::Empty; }
+  virtual const RegMask &in_RegMask(uint) const { return RegMask::Empty; }
+
+  // Polymorphic factory method:
+  static ConNode* make( Compile* C, const Type *t );
+};
+
+//------------------------------ConINode---------------------------------------
+// Simple integer constants
+class ConINode : public ConNode {
+public:
+  ConINode( const TypeInt *t ) : ConNode(t) {}
+  virtual int Opcode() const;
+
+  // Factory method:
+  static ConINode* make( Compile* C, int con ) {
+    return new (C, 1) ConINode( TypeInt::make(con) );
+  }
+
+};
+
+//------------------------------ConPNode---------------------------------------
+// Simple pointer constants
+class ConPNode : public ConNode {
+public:
+  ConPNode( const TypePtr *t ) : ConNode(t) {}
+  virtual int Opcode() const;
+
+  // Factory methods:
+  static ConPNode* make( Compile *C ,address con ) {
+    if (con == NULL)
+      return new (C, 1) ConPNode( TypePtr::NULL_PTR ) ;
+    else
+      return new (C, 1) ConPNode( TypeRawPtr::make(con) );
+  }
+
+  static ConPNode* make( Compile *C, ciObject* con ) {
+    return new (C, 1) ConPNode( TypeOopPtr::make_from_constant(con) );
+  }
+
+};
+
+
+//------------------------------ConLNode---------------------------------------
+// Simple long constants
+class ConLNode : public ConNode {
+public:
+  ConLNode( const TypeLong *t ) : ConNode(t) {}
+  virtual int Opcode() const;
+
+  // Factory method:
+  static ConLNode* make( Compile *C ,jlong con ) {
+    return new (C, 1) ConLNode( TypeLong::make(con) );
+  }
+
+};
+
+//------------------------------ConFNode---------------------------------------
+// Simple float constants
+class ConFNode : public ConNode {
+public:
+  ConFNode( const TypeF *t ) : ConNode(t) {}
+  virtual int Opcode() const;
+
+  // Factory method:
+  static ConFNode* make( Compile *C, float con  ) {
+    return new (C, 1) ConFNode( TypeF::make(con) );
+  }
+
+};
+
+//------------------------------ConDNode---------------------------------------
+// Simple double constants
+class ConDNode : public ConNode {
+public:
+  ConDNode( const TypeD *t ) : ConNode(t) {}
+  virtual int Opcode() const;
+
+  // Factory method:
+  static ConDNode* make( Compile *C, double con ) {
+    return new (C, 1) ConDNode( TypeD::make(con) );
+  }
+
+};
+
+//------------------------------BinaryNode-------------------------------------
+// Place holder for the 2 conditional inputs to a CMove.  CMove needs 4
+// inputs: the Bool (for the lt/gt/eq/ne bits), the flags (result of some
+// compare), and the 2 values to select between.  The Matcher requires a
+// binary tree so we break it down like this:
+//     (CMove (Binary bol cmp) (Binary src1 src2))
+class BinaryNode : public Node {
+public:
+  BinaryNode( Node *n1, Node *n2 ) : Node(0,n1,n2) { }
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return 0; }
+};
+
+//------------------------------CMoveNode--------------------------------------
+// Conditional move
+class CMoveNode : public TypeNode {
+public:
+  enum { Control,               // When is it safe to do this cmove?
+         Condition,             // Condition controlling the cmove
+         IfFalse,               // Value if condition is false
+         IfTrue };              // Value if condition is true
+  CMoveNode( Node *bol, Node *left, Node *right, const Type *t ) : TypeNode(t,4)
+  {
+    init_class_id(Class_CMove);
+    // all inputs are nullified in Node::Node(int)
+    // init_req(Control,NULL);
+    init_req(Condition,bol);
+    init_req(IfFalse,left);
+    init_req(IfTrue,right);
+  }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  static CMoveNode *make( Compile *C, Node *c, Node *bol, Node *left, Node *right, const Type *t );
+  // Helper function to spot cmove graph shapes
+  static Node *is_cmove_id( PhaseTransform *phase, Node *cmp, Node *t, Node *f, BoolNode *b );
+};
+
+//------------------------------CMoveDNode-------------------------------------
+class CMoveDNode : public CMoveNode {
+public:
+  CMoveDNode( Node *bol, Node *left, Node *right, const Type* t) : CMoveNode(bol,left,right,t){}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveFNode-------------------------------------
+class CMoveFNode : public CMoveNode {
+public:
+  CMoveFNode( Node *bol, Node *left, Node *right, const Type* t ) : CMoveNode(bol,left,right,t) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveINode-------------------------------------
+class CMoveINode : public CMoveNode {
+public:
+  CMoveINode( Node *bol, Node *left, Node *right, const TypeInt *ti ) : CMoveNode(bol,left,right,ti){}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CMoveLNode-------------------------------------
+class CMoveLNode : public CMoveNode {
+public:
+  CMoveLNode(Node *bol, Node *left, Node *right, const TypeLong *tl ) : CMoveNode(bol,left,right,tl){}
+  virtual int Opcode() const;
+};
+
+//------------------------------CMovePNode-------------------------------------
+class CMovePNode : public CMoveNode {
+public:
+  CMovePNode( Node *c, Node *bol, Node *left, Node *right, const TypePtr* t ) : CMoveNode(bol,left,right,t) { init_req(Control,c); }
+  virtual int Opcode() const;
+};
+
+//------------------------------ConstraintCastNode-------------------------------------
+// cast to a different range
+class ConstraintCastNode: public TypeNode {
+public:
+  ConstraintCastNode (Node *n, const Type *t ): TypeNode(t,2) {
+    init_class_id(Class_ConstraintCast);
+    init_req(1, n);
+  }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const = 0;
+  virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------CastIINode-------------------------------------
+// cast integer to integer (different range)
+class CastIINode: public ConstraintCastNode {
+public:
+  CastIINode (Node *n, const Type *t ): ConstraintCastNode(n,t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CastPPNode-------------------------------------
+// cast pointer to pointer (different type)
+class CastPPNode: public ConstraintCastNode {
+public:
+  CastPPNode (Node *n, const Type *t ): ConstraintCastNode(n, t) {
+    // Only CastPP is safe.  CastII can cause optimizer loops.
+    init_flags(Flag_is_dead_loop_safe);
+  }
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegP; }
+  virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------CheckCastPPNode--------------------------------
+// for _checkcast, cast pointer to pointer (different type), without JOIN,
+class CheckCastPPNode: public TypeNode {
+public:
+  CheckCastPPNode( Node *c, Node *n, const Type *t ) : TypeNode(t,2) {
+    init_class_id(Class_CheckCastPP);
+    init_flags(Flag_is_dead_loop_safe);
+    init_req(0, c);
+    init_req(1, n);
+  }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int   Opcode() const;
+  virtual uint  ideal_reg() const { return Op_RegP; }
+  // No longer remove CheckCast after CCP as it gives me a place to hang
+  // the proper address type - which is required to compute anti-deps.
+  //virtual Node *Ideal_DU_postCCP( PhaseCCP * );
+};
+
+//------------------------------Conv2BNode-------------------------------------
+// Convert int/pointer to a Boolean.  Map zero to zero, all else to 1.
+class Conv2BNode : public Node {
+public:
+  Conv2BNode( Node *i ) : Node(0,i) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint  ideal_reg() const { return Op_RegI; }
+};
+
+// The conversions operations are all Alpha sorted.  Please keep it that way!
+//------------------------------ConvD2FNode------------------------------------
+// Convert double to float
+class ConvD2FNode : public Node {
+public:
+  ConvD2FNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint  ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvD2INode------------------------------------
+// Convert Double to Integer
+class ConvD2INode : public Node {
+public:
+  ConvD2INode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint  ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ConvD2LNode------------------------------------
+// Convert Double to Long
+class ConvD2LNode : public Node {
+public:
+  ConvD2LNode( Node *dbl ) : Node(0,dbl) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvF2DNode------------------------------------
+// Convert Float to a Double.
+class ConvF2DNode : public Node {
+public:
+  ConvF2DNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint  ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvF2INode------------------------------------
+// Convert float to integer
+class ConvF2INode : public Node {
+public:
+  ConvF2INode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint  ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ConvF2LNode------------------------------------
+// Convert float to long
+class ConvF2LNode : public Node {
+public:
+  ConvF2LNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint  ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvI2DNode------------------------------------
+// Convert Integer to Double
+class ConvI2DNode : public Node {
+public:
+  ConvI2DNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint  ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvI2FNode------------------------------------
+// Convert Integer to Float
+class ConvI2FNode : public Node {
+public:
+  ConvI2FNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint  ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvI2LNode------------------------------------
+// Convert integer to long
+class ConvI2LNode : public TypeNode {
+public:
+  ConvI2LNode(Node *in1, const TypeLong* t = TypeLong::INT)
+    : TypeNode(t, 2)
+  { init_req(1, in1); }
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint  ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ConvL2DNode------------------------------------
+// Convert Long to Double
+class ConvL2DNode : public Node {
+public:
+  ConvL2DNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ConvL2FNode------------------------------------
+// Convert Long to Float
+class ConvL2FNode : public Node {
+public:
+  ConvL2FNode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint  ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ConvL2INode------------------------------------
+// Convert long to integer
+class ConvL2INode : public Node {
+public:
+  ConvL2INode( Node *in1 ) : Node(0,in1) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint  ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CastX2PNode-------------------------------------
+// convert a machine-pointer-sized integer to a raw pointer
+class CastX2PNode : public Node {
+public:
+  CastX2PNode( Node *n ) : Node(NULL, n) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegP; }
+  virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+};
+
+//------------------------------CastP2XNode-------------------------------------
+// Used in both 32-bit and 64-bit land.
+// Used for card-marks and unsafe pointer math.
+class CastP2XNode : public Node {
+public:
+  CastP2XNode( Node *ctrl, Node *n ) : Node(ctrl, n) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual uint ideal_reg() const { return Op_RegX; }
+  virtual const Type *bottom_type() const { return TypeX_X; }
+  // Return false to keep node from moving away from an associated card mark.
+  virtual bool depends_only_on_test() const { return false; }
+};
+
+//------------------------------MemMoveNode------------------------------------
+// Memory to memory move.  Inserted very late, after allocation.
+class MemMoveNode : public Node {
+public:
+  MemMoveNode( Node *dst, Node *src ) : Node(0,dst,src) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------ThreadLocalNode--------------------------------
+// Ideal Node which returns the base of ThreadLocalStorage.
+class ThreadLocalNode : public Node {
+public:
+  ThreadLocalNode( ) : Node((Node*)Compile::current()->root()) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM;}
+  virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//------------------------------LoadReturnPCNode-------------------------------
+class LoadReturnPCNode: public Node {
+public:
+  LoadReturnPCNode(Node *c) : Node(c) { }
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+
+//-----------------------------RoundFloatNode----------------------------------
+class RoundFloatNode: public Node {
+public:
+  RoundFloatNode(Node* c, Node *in1): Node(c, in1) {}
+  virtual int   Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint  ideal_reg() const { return Op_RegF; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//-----------------------------RoundDoubleNode---------------------------------
+class RoundDoubleNode: public Node {
+public:
+  RoundDoubleNode(Node* c, Node *in1): Node(c, in1) {}
+  virtual int   Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint  ideal_reg() const { return Op_RegD; }
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------Opaque1Node------------------------------------
+// A node to prevent unwanted optimizations.  Allows constant folding.
+// Stops value-numbering, Ideal calls or Identity functions.
+class Opaque1Node : public Node {
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const;
+public:
+  Opaque1Node( Node *n ) : Node(0,n) {}
+  // Special version for the pre-loop to hold the original loop limit
+  // which is consumed by range check elimination.
+  Opaque1Node( Node *n, Node* orig_limit ) : Node(0,n,orig_limit) {}
+  Node* original_loop_limit() { return req()==3 ? in(2) : NULL; }
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual Node *Identity( PhaseTransform *phase );
+};
+
+//------------------------------Opaque2Node------------------------------------
+// A node to prevent unwanted optimizations.  Allows constant folding.  Stops
+// value-numbering, most Ideal calls or Identity functions.  This Node is
+// specifically designed to prevent the pre-increment value of a loop trip
+// counter from being live out of the bottom of the loop (hence causing the
+// pre- and post-increment values both being live and thus requiring an extra
+// temp register and an extra move).  If we "accidentally" optimize through
+// this kind of a Node, we'll get slightly pessimal, but correct, code.  Thus
+// it's OK to be slightly sloppy on optimizations here.
+class Opaque2Node : public Node {
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const;
+public:
+  Opaque2Node( Node *n ) : Node(0,n) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+};
+
+//----------------------PartialSubtypeCheckNode--------------------------------
+// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
+// array for an instance of the superklass.  Set a hidden internal cache on a
+// hit (cache is checked with exposed code in gen_subtype_check()).  Return
+// not zero for a miss or zero for a hit.
+class PartialSubtypeCheckNode : public Node {
+public:
+  PartialSubtypeCheckNode(Node* c, Node* sub, Node* super) : Node(c,sub,super) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+  virtual uint ideal_reg() const { return Op_RegP; }
+};
+
+//
+class MoveI2FNode : public Node {
+ public:
+  MoveI2FNode( Node *value ) : Node(0,value) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+  virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveL2DNode : public Node {
+ public:
+  MoveL2DNode( Node *value ) : Node(0,value) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveF2INode : public Node {
+ public:
+  MoveF2INode( Node *value ) : Node(0,value) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual const Type* Value( PhaseTransform *phase ) const;
+};
+
+class MoveD2LNode : public Node {
+ public:
+  MoveD2LNode( Node *value ) : Node(0,value) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+  virtual const Type* Value( PhaseTransform *phase ) const;
+};
diff --git a/src/share/vm/opto/divnode.cpp b/src/share/vm/opto/divnode.cpp
new file mode 100644
index 000000000..5443495ff
--- /dev/null
+++ b/src/share/vm/opto/divnode.cpp
@@ -0,0 +1,1031 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_divnode.cpp.incl"
+#include <math.h>
+
+// Implement the integer constant divide -> long multiply transform found in
+//   "Division by Invariant Integers using Multiplication"
+//     by Granlund and Montgomery
+static Node *transform_int_divide_to_long_multiply( PhaseGVN *phase, Node *dividend, int divisor ) {
+
+  // Check for invalid divisors
+  assert( divisor != 0 && divisor != min_jint && divisor != 1,
+    "bad divisor for transforming to long multiply" );
+
+  // Compute l = ceiling(log2(d))
+  //   presumes d is more likely small
+  bool d_pos = divisor >= 0;
+  int d = d_pos ? divisor : -divisor;
+  unsigned ud = (unsigned)d;
+  const int N = 32;
+  int l = log2_intptr(d-1)+1;
+  int sh_post = l;
+
+  const uint64_t U1 = (uint64_t)1;
+
+  // Cliff pointed out how to prevent overflow (from the paper)
+  uint64_t m_low  =  (((U1 << l) - ud) << N)                  / ud + (U1 << N);
+  uint64_t m_high = ((((U1 << l) - ud) << N) + (U1 << (l+1))) / ud + (U1 << N);
+
+  // Reduce to lowest terms
+  for ( ; sh_post > 0; sh_post-- ) {
+    uint64_t m_low_1  = m_low  >> 1;
+    uint64_t m_high_1 = m_high >> 1;
+    if ( m_low_1 >= m_high_1 )
+      break;
+    m_low  = m_low_1;
+    m_high = m_high_1;
+  }
+
+  // Result
+  Node *q;
+
+  // division by +/- 1
+  if (d == 1) {
+    // Filtered out as identity above
+    if (d_pos)
+      return NULL;
+
+    // Just negate the value
+    else {
+      q = new (phase->C, 3) SubINode(phase->intcon(0), dividend);
+    }
+  }
+
+  // division by +/- a power of 2
+  else if ( is_power_of_2(d) ) {
+
+    // See if we can simply do a shift without rounding
+    bool needs_rounding = true;
+    const Type *dt = phase->type(dividend);
+    const TypeInt *dti = dt->isa_int();
+
+    // we don't need to round a positive dividend
+    if (dti && dti->_lo >= 0)
+      needs_rounding = false;
+
+    // An AND mask of sufficient size clears the low bits and
+    // I can avoid rounding.
+    else if( dividend->Opcode() == Op_AndI ) {
+      const TypeInt *andconi = phase->type( dividend->in(2) )->isa_int();
+      if( andconi && andconi->is_con(-d) ) {
+        dividend = dividend->in(1);
+        needs_rounding = false;
+      }
+    }
+
+    // Add rounding to the shift to handle the sign bit
+    if( needs_rounding ) {
+      Node *t1 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(l - 1)));
+      Node *t2 = phase->transform(new (phase->C, 3) URShiftINode(t1, phase->intcon(N - l)));
+      dividend = phase->transform(new (phase->C, 3) AddINode(dividend, t2));
+    }
+
+    q = new (phase->C, 3) RShiftINode(dividend, phase->intcon(l));
+
+    if (!d_pos)
+      q = new (phase->C, 3) SubINode(phase->intcon(0), phase->transform(q));
+  }
+
+  // division by something else
+  else if (m_high < (U1 << (N-1))) {
+    Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend));
+    Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high)));
+    Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(sh_post+N)));
+    Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3));
+    Node *t5 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1)));
+
+    q = new (phase->C, 3) SubINode(d_pos ? t4 : t5, d_pos ? t5 : t4);
+  }
+
+  // This handles that case where m_high is >= 2**(N-1). In that case,
+  // we subtract out 2**N from the multiply and add it in later as
+  // "dividend" in the equation (t5). This case computes the same result
+  // as the immediately preceeding case, save that rounding and overflow
+  // are accounted for.
+  else {
+    Node *t1 = phase->transform(new (phase->C, 2) ConvI2LNode(dividend));
+    Node *t2 = phase->transform(new (phase->C, 3) MulLNode(t1, phase->longcon(m_high - (U1 << N))));
+    Node *t3 = phase->transform(new (phase->C, 3) RShiftLNode(t2, phase->intcon(N)));
+    Node *t4 = phase->transform(new (phase->C, 2) ConvL2INode(t3));
+    Node *t5 = phase->transform(new (phase->C, 3) AddINode(dividend, t4));
+    Node *t6 = phase->transform(new (phase->C, 3) RShiftINode(t5, phase->intcon(sh_post)));
+    Node *t7 = phase->transform(new (phase->C, 3) RShiftINode(dividend, phase->intcon(N-1)));
+
+    q = new (phase->C, 3) SubINode(d_pos ? t6 : t7, d_pos ? t7 : t6);
+  }
+
+  return (q);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivINode::Identity( PhaseTransform *phase ) {
+  return (phase->type( in(2) )->higher_equal(TypeInt::ONE)) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Divides can be changed to multiplies and/or shifts
+Node *DivINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (in(0) && remove_dead_region(phase, can_reshape))  return this;
+
+  const Type *t = phase->type( in(2) );
+  if( t == TypeInt::ONE )       // Identity?
+    return NULL;                // Skip it
+
+  const TypeInt *ti = t->isa_int();
+  if( !ti ) return NULL;
+  if( !ti->is_con() ) return NULL;
+  int i = ti->get_con();        // Get divisor
+
+  if (i == 0) return NULL;      // Dividing by zero constant does not idealize
+
+  set_req(0,NULL);              // Dividing by a not-zero constant; no faulting
+
+  // Dividing by MININT does not optimize as a power-of-2 shift.
+  if( i == min_jint ) return NULL;
+
+  return transform_int_divide_to_long_multiply( phase, in(1), i );
+}
+
+//------------------------------Value------------------------------------------
+// A DivINode divides its inputs.  The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivINode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // x/x == 1 since we always generate the dynamic divisor check for 0.
+  if( phase->eqv( in(1), in(2) ) )
+    return TypeInt::ONE;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // Divide the two numbers.  We approximate.
+  // If divisor is a constant and not zero
+  const TypeInt *i1 = t1->is_int();
+  const TypeInt *i2 = t2->is_int();
+  int widen = MAX2(i1->_widen, i2->_widen);
+
+  if( i2->is_con() && i2->get_con() != 0 ) {
+    int32 d = i2->get_con(); // Divisor
+    jint lo, hi;
+    if( d >= 0 ) {
+      lo = i1->_lo/d;
+      hi = i1->_hi/d;
+    } else {
+      if( d == -1 && i1->_lo == min_jint ) {
+        // 'min_jint/-1' throws arithmetic exception during compilation
+        lo = min_jint;
+        // do not support holes, 'hi' must go to either min_jint or max_jint:
+        // [min_jint, -10]/[-1,-1] ==> [min_jint] UNION [10,max_jint]
+        hi = i1->_hi == min_jint ? min_jint : max_jint;
+      } else {
+        lo = i1->_hi/d;
+        hi = i1->_lo/d;
+      }
+    }
+    return TypeInt::make(lo, hi, widen);
+  }
+
+  // If the dividend is a constant
+  if( i1->is_con() ) {
+    int32 d = i1->get_con();
+    if( d < 0 ) {
+      if( d == min_jint ) {
+        //  (-min_jint) == min_jint == (min_jint / -1)
+        return TypeInt::make(min_jint, max_jint/2 + 1, widen);
+      } else {
+        return TypeInt::make(d, -d, widen);
+      }
+    }
+    return TypeInt::make(-d, d, widen);
+  }
+
+  // Otherwise we give up all hope
+  return TypeInt::INT;
+}
+
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivLNode::Identity( PhaseTransform *phase ) {
+  return (phase->type( in(2) )->higher_equal(TypeLong::ONE)) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Dividing by a power of 2 is a shift.
+Node *DivLNode::Ideal( PhaseGVN *phase, bool can_reshape) {
+  if (in(0) && remove_dead_region(phase, can_reshape))  return this;
+
+  const Type *t = phase->type( in(2) );
+  if( t == TypeLong::ONE )       // Identity?
+    return NULL;                // Skip it
+
+  const TypeLong *ti = t->isa_long();
+  if( !ti ) return NULL;
+  if( !ti->is_con() ) return NULL;
+  jlong i = ti->get_con();      // Get divisor
+  if( i ) set_req(0, NULL);     // Dividing by a not-zero constant; no faulting
+
+  // Dividing by MININT does not optimize as a power-of-2 shift.
+  if( i == min_jlong ) return NULL;
+
+  // Check for negative power of 2 divisor, if so, negate it and set a flag
+  // to indicate result needs to be negated.  Note that negating the dividend
+  // here does not work when it has the value MININT
+  Node *dividend = in(1);
+  bool negate_res = false;
+  if (is_power_of_2_long(-i)) {
+    i = -i;                     // Flip divisor
+    negate_res = true;
+  }
+
+  // Check for power of 2
+  if (!is_power_of_2_long(i))   // Is divisor a power of 2?
+    return NULL;                // Not a power of 2
+
+  // Compute number of bits to shift
+  int log_i = log2_long(i);
+
+  // See if we can simply do a shift without rounding
+  bool needs_rounding = true;
+  const Type *dt = phase->type(dividend);
+  const TypeLong *dtl = dt->isa_long();
+
+  if (dtl && dtl->_lo > 0) {
+    // we don't need to round a positive dividend
+    needs_rounding = false;
+  } else if( dividend->Opcode() == Op_AndL ) {
+    // An AND mask of sufficient size clears the low bits and
+    // I can avoid rounding.
+    const TypeLong *andconi = phase->type( dividend->in(2) )->isa_long();
+    if( andconi &&
+        andconi->is_con() &&
+        andconi->get_con() == -i ) {
+      dividend = dividend->in(1);
+      needs_rounding = false;
+    }
+  }
+
+  if (!needs_rounding) {
+    Node *result = new (phase->C, 3) RShiftLNode(dividend, phase->intcon(log_i));
+    if (negate_res) {
+      result = phase->transform(result);
+      result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
+    }
+    return result;
+  }
+
+  // Divide-by-power-of-2 can be made into a shift, but you have to do
+  // more math for the rounding.  You need to add 0 for positive
+  // numbers, and "i-1" for negative numbers.  Example: i=4, so the
+  // shift is by 2.  You need to add 3 to negative dividends and 0 to
+  // positive ones.  So (-7+3)>>2 becomes -1, (-4+3)>>2 becomes -1,
+  // (-2+3)>>2 becomes 0, etc.
+
+  // Compute 0 or -1, based on sign bit
+  Node *sign = phase->transform(new (phase->C, 3) RShiftLNode(dividend,phase->intcon(63)));
+  // Mask sign bit to the low sign bits
+  Node *round = phase->transform(new (phase->C, 3) AndLNode(sign,phase->longcon(i-1)));
+  // Round up before shifting
+  Node *sum = phase->transform(new (phase->C, 3) AddLNode(dividend,round));
+  // Shift for division
+  Node *result = new (phase->C, 3) RShiftLNode(sum, phase->intcon(log_i));
+  if (negate_res) {
+    result = phase->transform(result);
+    result = new (phase->C, 3) SubLNode(phase->longcon(0), result);
+  }
+
+  return result;
+}
+
+//------------------------------Value------------------------------------------
+// A DivLNode divides its inputs.  The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivLNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // x/x == 1 since we always generate the dynamic divisor check for 0.
+  if( phase->eqv( in(1), in(2) ) )
+    return TypeLong::ONE;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // Divide the two numbers.  We approximate.
+  // If divisor is a constant and not zero
+  const TypeLong *i1 = t1->is_long();
+  const TypeLong *i2 = t2->is_long();
+  int widen = MAX2(i1->_widen, i2->_widen);
+
+  if( i2->is_con() && i2->get_con() != 0 ) {
+    jlong d = i2->get_con();    // Divisor
+    jlong lo, hi;
+    if( d >= 0 ) {
+      lo = i1->_lo/d;
+      hi = i1->_hi/d;
+    } else {
+      if( d == CONST64(-1) && i1->_lo == min_jlong ) {
+        // 'min_jlong/-1' throws arithmetic exception during compilation
+        lo = min_jlong;
+        // do not support holes, 'hi' must go to either min_jlong or max_jlong:
+        // [min_jlong, -10]/[-1,-1] ==> [min_jlong] UNION [10,max_jlong]
+        hi = i1->_hi == min_jlong ? min_jlong : max_jlong;
+      } else {
+        lo = i1->_hi/d;
+        hi = i1->_lo/d;
+      }
+    }
+    return TypeLong::make(lo, hi, widen);
+  }
+
+  // If the dividend is a constant
+  if( i1->is_con() ) {
+    jlong d = i1->get_con();
+    if( d < 0 ) {
+      if( d == min_jlong ) {
+        //  (-min_jlong) == min_jlong == (min_jlong / -1)
+        return TypeLong::make(min_jlong, max_jlong/2 + 1, widen);
+      } else {
+        return TypeLong::make(d, -d, widen);
+      }
+    }
+    return TypeLong::make(-d, d, widen);
+  }
+
+  // Otherwise we give up all hope
+  return TypeLong::LONG;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// An DivFNode divides its inputs.  The third input is a Control input, used to
+// prevent hoisting the divide above an unsafe test.
+const Type *DivFNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // x/x == 1, we ignore 0/0.
+  // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+  // does not work for variables because of NaN's
+  if( phase->eqv( in(1), in(2) ) && t1->base() == Type::FloatCon)
+    if (!g_isnan(t1->getf()) && g_isfinite(t1->getf()) && t1->getf() != 0.0) // could be negative ZERO or NaN
+      return TypeF::ONE;
+
+  if( t2 == TypeF::ONE )
+    return t1;
+
+  // If divisor is a constant and not zero, divide them numbers
+  if( t1->base() == Type::FloatCon &&
+      t2->base() == Type::FloatCon &&
+      t2->getf() != 0.0 ) // could be negative zero
+    return TypeF::make( t1->getf()/t2->getf() );
+
+  // If the dividend is a constant zero
+  // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+  // Test TypeF::ZERO is not sufficient as it could be negative zero
+
+  if( t1 == TypeF::ZERO && !g_isnan(t2->getf()) && t2->getf() != 0.0 )
+    return TypeF::ZERO;
+
+  // Otherwise we give up all hope
+  return Type::FLOAT;
+}
+
+//------------------------------isA_Copy---------------------------------------
+// Dividing by self is 1.
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivFNode::Identity( PhaseTransform *phase ) {
+  return (phase->type( in(2) ) == TypeF::ONE) ? in(1) : this;
+}
+
+
+//------------------------------Idealize---------------------------------------
+Node *DivFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (in(0) && remove_dead_region(phase, can_reshape))  return this;
+
+  const Type *t2 = phase->type( in(2) );
+  if( t2 == TypeF::ONE )         // Identity?
+    return NULL;                // Skip it
+
+  const TypeF *tf = t2->isa_float_constant();
+  if( !tf ) return NULL;
+  if( tf->base() != Type::FloatCon ) return NULL;
+
+  // Check for out of range values
+  if( tf->is_nan() || !tf->is_finite() ) return NULL;
+
+  // Get the value
+  float f = tf->getf();
+  int exp;
+
+  // Only for special case of dividing by a power of 2
+  if( frexp((double)f, &exp) != 0.5 ) return NULL;
+
+  // Limit the range of acceptable exponents
+  if( exp < -126 || exp > 126 ) return NULL;
+
+  // Compute the reciprocal
+  float reciprocal = ((float)1.0) / f;
+
+  assert( frexp((double)reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );
+
+  // return multiplication by the reciprocal
+  return (new (phase->C, 3) MulFNode(in(1), phase->makecon(TypeF::make(reciprocal))));
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// An DivDNode divides its inputs.  The third input is a Control input, used to
+// prvent hoisting the divide above an unsafe test.
+const Type *DivDNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // x/x == 1, we ignore 0/0.
+  // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+  // Does not work for variables because of NaN's
+  if( phase->eqv( in(1), in(2) ) && t1->base() == Type::DoubleCon)
+    if (!g_isnan(t1->getd()) && g_isfinite(t1->getd()) && t1->getd() != 0.0) // could be negative ZERO or NaN
+      return TypeD::ONE;
+
+  if( t2 == TypeD::ONE )
+    return t1;
+
+  // If divisor is a constant and not zero, divide them numbers
+  if( t1->base() == Type::DoubleCon &&
+      t2->base() == Type::DoubleCon &&
+      t2->getd() != 0.0 ) // could be negative zero
+    return TypeD::make( t1->getd()/t2->getd() );
+
+  // If the dividend is a constant zero
+  // Note: if t1 and t2 are zero then result is NaN (JVMS page 213)
+  // Test TypeF::ZERO is not sufficient as it could be negative zero
+  if( t1 == TypeD::ZERO && !g_isnan(t2->getd()) && t2->getd() != 0.0 )
+    return TypeD::ZERO;
+
+  // Otherwise we give up all hope
+  return Type::DOUBLE;
+}
+
+
+//------------------------------isA_Copy---------------------------------------
+// Dividing by self is 1.
+// If the divisor is 1, we are an identity on the dividend.
+Node *DivDNode::Identity( PhaseTransform *phase ) {
+  return (phase->type( in(2) ) == TypeD::ONE) ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+Node *DivDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (in(0) && remove_dead_region(phase, can_reshape))  return this;
+
+  const Type *t2 = phase->type( in(2) );
+  if( t2 == TypeD::ONE )         // Identity?
+    return NULL;                // Skip it
+
+  const TypeD *td = t2->isa_double_constant();
+  if( !td ) return NULL;
+  if( td->base() != Type::DoubleCon ) return NULL;
+
+  // Check for out of range values
+  if( td->is_nan() || !td->is_finite() ) return NULL;
+
+  // Get the value
+  double d = td->getd();
+  int exp;
+
+  // Only for special case of dividing by a power of 2
+  if( frexp(d, &exp) != 0.5 ) return NULL;
+
+  // Limit the range of acceptable exponents
+  if( exp < -1021 || exp > 1022 ) return NULL;
+
+  // Compute the reciprocal
+  double reciprocal = 1.0 / d;
+
+  assert( frexp(reciprocal, &exp) == 0.5, "reciprocal should be power of 2" );
+
+  // return multiplication by the reciprocal
+  return (new (phase->C, 3) MulDNode(in(1), phase->makecon(TypeD::make(reciprocal))));
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *ModINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Check for dead control input
+  if( remove_dead_region(phase, can_reshape) )  return this;
+
+  // Get the modulus
+  const Type *t = phase->type( in(2) );
+  if( t == Type::TOP ) return NULL;
+  const TypeInt *ti = t->is_int();
+
+  // Check for useless control input
+  // Check for excluding mod-zero case
+  if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) {
+    set_req(0, NULL);        // Yank control input
+    return this;
+  }
+
+  // See if we are MOD'ing by 2^k or 2^k-1.
+  if( !ti->is_con() ) return NULL;
+  jint con = ti->get_con();
+
+  Node *hook = new (phase->C, 1) Node(1);
+
+  // First, special check for modulo 2^k-1
+  if( con >= 0 && con < max_jint && is_power_of_2(con+1) ) {
+    uint k = exact_log2(con+1);  // Extract k
+
+    // Basic algorithm by David Detlefs.  See fastmod_int.java for gory details.
+    static int unroll_factor[] = { 999, 999, 29, 14, 9, 7, 5, 4, 4, 3, 3, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/};
+    int trip_count = 1;
+    if( k < ARRAY_SIZE(unroll_factor))  trip_count = unroll_factor[k];
+
+    // If the unroll factor is not too large, and if conditional moves are
+    // ok, then use this case
+    if( trip_count <= 5 && ConditionalMoveLimit != 0 ) {
+      Node *x = in(1);            // Value being mod'd
+      Node *divisor = in(2);      // Also is mask
+
+      hook->init_req(0, x);       // Add a use to x to prevent him from dying
+      // Generate code to reduce X rapidly to nearly 2^k-1.
+      for( int i = 0; i < trip_count; i++ ) {
+          Node *xl = phase->transform( new (phase->C, 3) AndINode(x,divisor) );
+          Node *xh = phase->transform( new (phase->C, 3) RShiftINode(x,phase->intcon(k)) ); // Must be signed
+          x = phase->transform( new (phase->C, 3) AddINode(xh,xl) );
+          hook->set_req(0, x);
+      }
+
+      // Generate sign-fixup code.  Was original value positive?
+      // int hack_res = (i >= 0) ? divisor : 1;
+      Node *cmp1 = phase->transform( new (phase->C, 3) CmpINode( in(1), phase->intcon(0) ) );
+      Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) );
+      Node *cmov1= phase->transform( new (phase->C, 4) CMoveINode(bol1, phase->intcon(1), divisor, TypeInt::POS) );
+      // if( x >= hack_res ) x -= divisor;
+      Node *sub  = phase->transform( new (phase->C, 3) SubINode( x, divisor ) );
+      Node *cmp2 = phase->transform( new (phase->C, 3) CmpINode( x, cmov1 ) );
+      Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) );
+      // Convention is to not transform the return value of an Ideal
+      // since Ideal is expected to return a modified 'this' or a new node.
+      Node *cmov2= new (phase->C, 4) CMoveINode(bol2, x, sub, TypeInt::INT);
+      // cmov2 is now the mod
+
+      // Now remove the bogus extra edges used to keep things alive
+      if (can_reshape) {
+        phase->is_IterGVN()->remove_dead_node(hook);
+      } else {
+        hook->set_req(0, NULL);   // Just yank bogus edge during Parse phase
+      }
+      return cmov2;
+    }
+  }
+
+  // Fell thru, the unroll case is not appropriate. Transform the modulo
+  // into a long multiply/int multiply/subtract case
+
+  // Cannot handle mod 0, and min_jint isn't handled by the transform
+  if( con == 0 || con == min_jint ) return NULL;
+
+  // Get the absolute value of the constant; at this point, we can use this
+  jint pos_con = (con >= 0) ? con : -con;
+
+  // integer Mod 1 is always 0
+  if( pos_con == 1 ) return new (phase->C, 1) ConINode(TypeInt::ZERO);
+
+  int log2_con = -1;
+
+  // If this is a power of two, they maybe we can mask it
+  if( is_power_of_2(pos_con) ) {
+    log2_con = log2_intptr((intptr_t)pos_con);
+
+    const Type *dt = phase->type(in(1));
+    const TypeInt *dti = dt->isa_int();
+
+    // See if this can be masked, if the dividend is non-negative
+    if( dti && dti->_lo >= 0 )
+      return ( new (phase->C, 3) AndINode( in(1), phase->intcon( pos_con-1 ) ) );
+  }
+
+  // Save in(1) so that it cannot be changed or deleted
+  hook->init_req(0, in(1));
+
+  // Divide using the transform from DivI to MulL
+  Node *divide = phase->transform( transform_int_divide_to_long_multiply( phase, in(1), pos_con ) );
+
+  // Re-multiply, using a shift if this is a power of two
+  Node *mult = NULL;
+
+  if( log2_con >= 0 )
+    mult = phase->transform( new (phase->C, 3) LShiftINode( divide, phase->intcon( log2_con ) ) );
+  else
+    mult = phase->transform( new (phase->C, 3) MulINode( divide, phase->intcon( pos_con ) ) );
+
+  // Finally, subtract the multiplied divided value from the original
+  Node *result = new (phase->C, 3) SubINode( in(1), mult );
+
+  // Now remove the bogus extra edges used to keep things alive
+  if (can_reshape) {
+    phase->is_IterGVN()->remove_dead_node(hook);
+  } else {
+    hook->set_req(0, NULL);       // Just yank bogus edge during Parse phase
+  }
+
+  // return the value
+  return result;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ModINode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // We always generate the dynamic check for 0.
+  // 0 MOD X is 0
+  if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+  // X MOD X is 0
+  if( phase->eqv( in(1), in(2) ) ) return TypeInt::ZERO;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  const TypeInt *i1 = t1->is_int();
+  const TypeInt *i2 = t2->is_int();
+  if( !i1->is_con() || !i2->is_con() ) {
+    if( i1->_lo >= 0 && i2->_lo >= 0 )
+      return TypeInt::POS;
+    // If both numbers are not constants, we know little.
+    return TypeInt::INT;
+  }
+  // Mod by zero?  Throw exception at runtime!
+  if( !i2->get_con() ) return TypeInt::POS;
+
+  // We must be modulo'ing 2 float constants.
+  // Check for min_jint % '-1', result is defined to be '0'.
+  if( i1->get_con() == min_jint && i2->get_con() == -1 )
+    return TypeInt::ZERO;
+
+  return TypeInt::make( i1->get_con() % i2->get_con() );
+}
+
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+Node *ModLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Check for dead control input
+  if( remove_dead_region(phase, can_reshape) )  return this;
+
+  // Get the modulus
+  const Type *t = phase->type( in(2) );
+  if( t == Type::TOP ) return NULL;
+  const TypeLong *ti = t->is_long();
+
+  // Check for useless control input
+  // Check for excluding mod-zero case
+  if( in(0) && (ti->_hi < 0 || ti->_lo > 0) ) {
+    set_req(0, NULL);        // Yank control input
+    return this;
+  }
+
+  // See if we are MOD'ing by 2^k or 2^k-1.
+  if( !ti->is_con() ) return NULL;
+  jlong con = ti->get_con();
+  bool m1 = false;
+  if( !is_power_of_2_long(con) ) {      // Not 2^k
+    if( !is_power_of_2_long(con+1) ) // Not 2^k-1?
+      return NULL;              // No interesting mod hacks
+    m1 = true;                  // Found 2^k-1
+    con++;                      // Convert to 2^k form
+  }
+  uint k = log2_long(con);       // Extract k
+
+  // Expand mod
+  if( !m1 ) {                   // Case 2^k
+  } else {                      // Case 2^k-1
+    // Basic algorithm by David Detlefs.  See fastmod_long.java for gory details.
+    // Used to help a popular random number generator which does a long-mod
+    // of 2^31-1 and shows up in SpecJBB and SciMark.
+    static int unroll_factor[] = { 999, 999, 61, 30, 20, 15, 12, 10, 8, 7, 6, 6, 5, 5, 4, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 /*past here we assume 1 forever*/};
+    int trip_count = 1;
+    if( k < ARRAY_SIZE(unroll_factor)) trip_count = unroll_factor[k];
+    if( trip_count > 4 ) return NULL; // Too much unrolling
+    if (ConditionalMoveLimit == 0) return NULL;  // cmov is required
+
+    Node *x = in(1);            // Value being mod'd
+    Node *divisor = in(2);      // Also is mask
+
+    Node *hook = new (phase->C, 1) Node(x);
+    // Generate code to reduce X rapidly to nearly 2^k-1.
+    for( int i = 0; i < trip_count; i++ ) {
+        Node *xl = phase->transform( new (phase->C, 3) AndLNode(x,divisor) );
+        Node *xh = phase->transform( new (phase->C, 3) RShiftLNode(x,phase->intcon(k)) ); // Must be signed
+        x = phase->transform( new (phase->C, 3) AddLNode(xh,xl) );
+        hook->set_req(0, x);    // Add a use to x to prevent him from dying
+    }
+    // Generate sign-fixup code.  Was original value positive?
+    // long hack_res = (i >= 0) ? divisor : CONST64(1);
+    Node *cmp1 = phase->transform( new (phase->C, 3) CmpLNode( in(1), phase->longcon(0) ) );
+    Node *bol1 = phase->transform( new (phase->C, 2) BoolNode( cmp1, BoolTest::ge ) );
+    Node *cmov1= phase->transform( new (phase->C, 4) CMoveLNode(bol1, phase->longcon(1), divisor, TypeLong::LONG) );
+    // if( x >= hack_res ) x -= divisor;
+    Node *sub  = phase->transform( new (phase->C, 3) SubLNode( x, divisor ) );
+    Node *cmp2 = phase->transform( new (phase->C, 3) CmpLNode( x, cmov1 ) );
+    Node *bol2 = phase->transform( new (phase->C, 2) BoolNode( cmp2, BoolTest::ge ) );
+    // Convention is to not transform the return value of an Ideal
+    // since Ideal is expected to return a modified 'this' or a new node.
+    Node *cmov2= new (phase->C, 4) CMoveLNode(bol2, x, sub, TypeLong::LONG);
+    // cmov2 is now the mod
+
+    // Now remove the bogus extra edges used to keep things alive
+    if (can_reshape) {
+      phase->is_IterGVN()->remove_dead_node(hook);
+    } else {
+      hook->set_req(0, NULL);   // Just yank bogus edge during Parse phase
+    }
+    return cmov2;
+  }
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *ModLNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // We always generate the dynamic check for 0.
+  // 0 MOD X is 0
+  if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+  // X MOD X is 0
+  if( phase->eqv( in(1), in(2) ) ) return TypeLong::ZERO;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  const TypeLong *i1 = t1->is_long();
+  const TypeLong *i2 = t2->is_long();
+  if( !i1->is_con() || !i2->is_con() ) {
+    if( i1->_lo >= CONST64(0) && i2->_lo >= CONST64(0) )
+      return TypeLong::POS;
+    // If both numbers are not constants, we know little.
+    return TypeLong::LONG;
+  }
+  // Mod by zero?  Throw exception at runtime!
+  if( !i2->get_con() ) return TypeLong::POS;
+
+  // We must be modulo'ing 2 float constants.
+  // Check for min_jint % '-1', result is defined to be '0'.
+  if( i1->get_con() == min_jlong && i2->get_con() == -1 )
+    return TypeLong::ZERO;
+
+  return TypeLong::make( i1->get_con() % i2->get_con() );
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ModFNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // If either is a NaN, return an input NaN
+  if( g_isnan(t1->getf()) )    return t1;
+  if( g_isnan(t2->getf()) )    return t2;
+
+  // It is not worth trying to constant fold this stuff!
+  return Type::FLOAT;
+
+  /*
+  // If dividend is infinity or divisor is zero, or both, the result is NaN
+  if( !g_isfinite(t1->getf()) || ((t2->getf() == 0.0) || (jint_cast(t2->getf()) == 0x80000000)) )
+
+  // X MOD infinity = X
+  if( !g_isfinite(t2->getf()) && !g_isnan(t2->getf()) ) return t1;
+  // 0 MOD finite = dividend (positive or negative zero)
+  // Not valid for: NaN MOD any; any MOD nan; 0 MOD 0; or for 0 MOD NaN
+  // NaNs are handled previously.
+  if( !(t2->getf() == 0.0) && !((int)t2->getf() == 0x80000000)) {
+    if (((t1->getf() == 0.0) || ((int)t1->getf() == 0x80000000)) && g_isfinite(t2->getf()) ) {
+      return t1;
+    }
+  }
+  // X MOD X is 0
+  // Does not work for variables because of NaN's
+  if( phase->eqv( in(1), in(2) ) && t1->base() == Type::FloatCon)
+    if (!g_isnan(t1->getf()) && (t1->getf() != 0.0) && ((int)t1->getf() != 0x80000000)) {
+      if(t1->getf() < 0.0) {
+        float result = jfloat_cast(0x80000000);
+        return TypeF::make( result );
+      }
+      else
+        return TypeF::ZERO;
+    }
+
+  // If both numbers are not constants, we know nothing.
+  if( (t1->base() != Type::FloatCon) || (t2->base() != Type::FloatCon) )
+    return Type::FLOAT;
+
+  // We must be modulo'ing 2 float constants.
+  // Make sure that the sign of the fmod is equal to the sign of the dividend
+  float result = (float)fmod( t1->getf(), t2->getf() );
+  float dividend = t1->getf();
+  if( (dividend < 0.0) || ((int)dividend == 0x80000000) ) {
+    if( result > 0.0 )
+      result = 0.0 - result;
+    else if( result == 0.0 ) {
+      result = jfloat_cast(0x80000000);
+    }
+  }
+  return TypeF::make( result );
+  */
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *ModDNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  // If either is a NaN, return an input NaN
+  if( g_isnan(t1->getd()) )    return t1;
+  if( g_isnan(t2->getd()) )    return t2;
+  // X MOD infinity = X
+  if( !g_isfinite(t2->getd())) return t1;
+  // 0 MOD finite = dividend (positive or negative zero)
+  // Not valid for: NaN MOD any; any MOD nan; 0 MOD 0; or for 0 MOD NaN
+  // NaNs are handled previously.
+  if( !(t2->getd() == 0.0) ) {
+    if( t1->getd() == 0.0 && g_isfinite(t2->getd()) ) {
+      return t1;
+    }
+  }
+
+  // X MOD X is 0
+  // does not work for variables because of NaN's
+  if( phase->eqv( in(1), in(2) ) && t1->base() == Type::DoubleCon )
+    if (!g_isnan(t1->getd()) && t1->getd() != 0.0)
+      return TypeD::ZERO;
+
+
+  // If both numbers are not constants, we know nothing.
+  if( (t1->base() != Type::DoubleCon) || (t2->base() != Type::DoubleCon) )
+    return Type::DOUBLE;
+
+  // We must be modulo'ing 2 double constants.
+  return TypeD::make( fmod( t1->getd(), t2->getd() ) );
+}
+
+//=============================================================================
+
+DivModNode::DivModNode( Node *c, Node *dividend, Node *divisor ) : MultiNode(3) {
+  init_req(0, c);
+  init_req(1, dividend);
+  init_req(2, divisor);
+}
+
+//------------------------------make------------------------------------------
+DivModINode* DivModINode::make(Compile* C, Node* div_or_mod) {
+  Node* n = div_or_mod;
+  assert(n->Opcode() == Op_DivI || n->Opcode() == Op_ModI,
+         "only div or mod input pattern accepted");
+
+  DivModINode* divmod = new (C, 3) DivModINode(n->in(0), n->in(1), n->in(2));
+  Node*        dproj  = new (C, 1) ProjNode(divmod, DivModNode::div_proj_num);
+  Node*        mproj  = new (C, 1) ProjNode(divmod, DivModNode::mod_proj_num);
+  return divmod;
+}
+
+//------------------------------make------------------------------------------
+DivModLNode* DivModLNode::make(Compile* C, Node* div_or_mod) {
+  Node* n = div_or_mod;
+  assert(n->Opcode() == Op_DivL || n->Opcode() == Op_ModL,
+         "only div or mod input pattern accepted");
+
+  DivModLNode* divmod = new (C, 3) DivModLNode(n->in(0), n->in(1), n->in(2));
+  Node*        dproj  = new (C, 1) ProjNode(divmod, DivModNode::div_proj_num);
+  Node*        mproj  = new (C, 1) ProjNode(divmod, DivModNode::mod_proj_num);
+  return divmod;
+}
+
+//------------------------------match------------------------------------------
+// return result(s) along with their RegMask info
+Node *DivModINode::match( const ProjNode *proj, const Matcher *match ) {
+  uint ideal_reg = proj->ideal_reg();
+  RegMask rm;
+  if (proj->_con == div_proj_num) {
+    rm = match->divI_proj_mask();
+  } else {
+    assert(proj->_con == mod_proj_num, "must be div or mod projection");
+    rm = match->modI_proj_mask();
+  }
+  return new (match->C, 1)MachProjNode(this, proj->_con, rm, ideal_reg);
+}
+
+
+//------------------------------match------------------------------------------
+// return result(s) along with their RegMask info
+Node *DivModLNode::match( const ProjNode *proj, const Matcher *match ) {
+  uint ideal_reg = proj->ideal_reg();
+  RegMask rm;
+  if (proj->_con == div_proj_num) {
+    rm = match->divL_proj_mask();
+  } else {
+    assert(proj->_con == mod_proj_num, "must be div or mod projection");
+    rm = match->modL_proj_mask();
+  }
+  return new (match->C, 1)MachProjNode(this, proj->_con, rm, ideal_reg);
+}
diff --git a/src/share/vm/opto/divnode.hpp b/src/share/vm/opto/divnode.hpp
new file mode 100644
index 000000000..797d2cf41
--- /dev/null
+++ b/src/share/vm/opto/divnode.hpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+//------------------------------DivINode---------------------------------------
+// Integer division
+// Note: this is division as defined by JVMS, i.e., MinInt/-1 == MinInt.
+// On processors which don't naturally support this special case (e.g., x86),
+// the matcher or runtime system must take care of this.
+class DivINode : public Node {
+public:
+  DivINode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor ) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------DivLNode---------------------------------------
+// Long division
+class DivLNode : public Node {
+public:
+  DivLNode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor ) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------DivFNode---------------------------------------
+// Float division
+class DivFNode : public Node {
+public:
+  DivFNode( Node *c, Node *dividend, Node *divisor ) : Node(c, dividend, divisor) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------DivDNode---------------------------------------
+// Double division
+class DivDNode : public Node {
+public:
+  DivDNode( Node *c, Node *dividend, Node *divisor ) : Node(c,dividend, divisor) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------ModINode---------------------------------------
+// Integer modulus
+class ModINode : public Node {
+public:
+  ModINode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ModLNode---------------------------------------
+// Long modulus
+class ModLNode : public Node {
+public:
+  ModLNode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ModFNode---------------------------------------
+// Float Modulus
+class ModFNode : public Node {
+public:
+  ModFNode( Node *c, Node *in1, Node *in2 ) : Node(c,in1, in2) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ModDNode---------------------------------------
+// Double Modulus
+class ModDNode : public Node {
+public:
+  ModDNode( Node *c, Node *in1, Node *in2 ) : Node(c, in1, in2) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------DivModNode---------------------------------------
+// Division with remainder result.
+class DivModNode : public MultiNode {
+protected:
+  DivModNode( Node *c, Node *dividend, Node *divisor );
+public:
+  enum {
+    div_proj_num =  0,      // quotient
+    mod_proj_num =  1       // remainder
+  };
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase ) { return this; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape) { return NULL; }
+  virtual const Type *Value( PhaseTransform *phase ) const { return bottom_type(); }
+  virtual uint hash() const { return Node::hash(); }
+  virtual bool is_CFG() const  { return false; }
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+
+  ProjNode* div_proj() { return proj_out(div_proj_num); }
+  ProjNode* mod_proj() { return proj_out(mod_proj_num); }
+};
+
+//------------------------------DivModINode---------------------------------------
+// Integer division with remainder result.
+class DivModINode : public DivModNode {
+public:
+  DivModINode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeTuple::INT_PAIR; }
+  virtual Node *match( const ProjNode *proj, const Matcher *m );
+
+  // Make a divmod and associated projections from a div or mod.
+  static DivModINode* make(Compile* C, Node* div_or_mod);
+};
+
+//------------------------------DivModLNode---------------------------------------
+// Long division with remainder result.
+class DivModLNode : public DivModNode {
+public:
+  DivModLNode( Node *c, Node *dividend, Node *divisor ) : DivModNode(c, dividend, divisor) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeTuple::LONG_PAIR; }
+  virtual Node *match( const ProjNode *proj, const Matcher *m );
+
+  // Make a divmod and associated projections from a div or mod.
+  static DivModLNode* make(Compile* C, Node* div_or_mod);
+};
diff --git a/src/share/vm/opto/doCall.cpp b/src/share/vm/opto/doCall.cpp
new file mode 100644
index 000000000..ff85fb643
--- /dev/null
+++ b/src/share/vm/opto/doCall.cpp
@@ -0,0 +1,862 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_doCall.cpp.incl"
+
+#ifndef PRODUCT
+void trace_type_profile(ciMethod *method, int depth, int bci, ciMethod *prof_method, ciKlass *prof_klass, int site_count, int receiver_count) {
+  if (TraceTypeProfile || PrintInlining || PrintOptoInlining) {
+    tty->print("   ");
+    for( int i = 0; i < depth; i++ ) tty->print("  ");
+    if (!PrintOpto) {
+      method->print_short_name();
+      tty->print(" ->");
+    }
+    tty->print(" @ %d  ", bci);
+    prof_method->print_short_name();
+    tty->print("  >>TypeProfile (%d/%d counts) = ", receiver_count, site_count);
+    prof_klass->name()->print_symbol();
+    tty->print_cr(" (%d bytes)", prof_method->code_size());
+  }
+}
+#endif
+
+CallGenerator* Compile::call_generator(ciMethod* call_method, int vtable_index, bool call_is_virtual, JVMState* jvms, bool allow_inline, float prof_factor) {
+  CallGenerator* cg;
+
+  // Dtrace currently doesn't work unless all calls are vanilla
+  if (DTraceMethodProbes) {
+    allow_inline = false;
+  }
+
+  // Note: When we get profiling during stage-1 compiles, we want to pull
+  // from more specific profile data which pertains to this inlining.
+  // Right now, ignore the information in jvms->caller(), and do method[bci].
+  ciCallProfile profile = jvms->method()->call_profile_at_bci(jvms->bci());
+
+  // See how many times this site has been invoked.
+  int site_count = profile.count();
+  int receiver_count = -1;
+  if (call_is_virtual && UseTypeProfile && profile.has_receiver(0)) {
+    // Receivers in the profile structure are ordered by call counts
+    // so that the most called (major) receiver is profile.receiver(0).
+    receiver_count = profile.receiver_count(0);
+  }
+
+  CompileLog* log = this->log();
+  if (log != NULL) {
+    int rid = (receiver_count >= 0)? log->identify(profile.receiver(0)): -1;
+    int r2id = (profile.morphism() == 2)? log->identify(profile.receiver(1)):-1;
+    log->begin_elem("call method='%d' count='%d' prof_factor='%g'",
+                    log->identify(call_method), site_count, prof_factor);
+    if (call_is_virtual)  log->print(" virtual='1'");
+    if (allow_inline)     log->print(" inline='1'");
+    if (receiver_count >= 0) {
+      log->print(" receiver='%d' receiver_count='%d'", rid, receiver_count);
+      if (profile.has_receiver(1)) {
+        log->print(" receiver2='%d' receiver2_count='%d'", r2id, profile.receiver_count(1));
+      }
+    }
+    log->end_elem();
+  }
+
+  // Special case the handling of certain common, profitable library
+  // methods.  If these methods are replaced with specialized code,
+  // then we return it as the inlined version of the call.
+  // We do this before the strict f.p. check below because the
+  // intrinsics handle strict f.p. correctly.
+  if (allow_inline) {
+    cg = find_intrinsic(call_method, call_is_virtual);
+    if (cg != NULL)  return cg;
+  }
+
+  // Do not inline strict fp into non-strict code, or the reverse
+  bool caller_method_is_strict = jvms->method()->is_strict();
+  if( caller_method_is_strict ^ call_method->is_strict() ) {
+    allow_inline = false;
+  }
+
+  // Attempt to inline...
+  if (allow_inline) {
+    // The profile data is only partly attributable to this caller,
+    // scale back the call site information.
+    float past_uses = jvms->method()->scale_count(site_count, prof_factor);
+    // This is the number of times we expect the call code to be used.
+    float expected_uses = past_uses;
+
+    // Try inlining a bytecoded method:
+    if (!call_is_virtual) {
+      InlineTree* ilt;
+      if (UseOldInlining) {
+        ilt = InlineTree::find_subtree_from_root(this->ilt(), jvms->caller(), jvms->method());
+      } else {
+        // Make a disembodied, stateless ILT.
+        // TO DO:  When UseOldInlining is removed, copy the ILT code elsewhere.
+        float site_invoke_ratio = prof_factor;
+        // Note:  ilt is for the root of this parse, not the present call site.
+        ilt = new InlineTree(this, jvms->method(), jvms->caller(), site_invoke_ratio);
+      }
+      WarmCallInfo scratch_ci;
+      if (!UseOldInlining)
+        scratch_ci.init(jvms, call_method, profile, prof_factor);
+      WarmCallInfo* ci = ilt->ok_to_inline(call_method, jvms, profile, &scratch_ci);
+      assert(ci != &scratch_ci, "do not let this pointer escape");
+      bool allow_inline   = (ci != NULL && !ci->is_cold());
+      bool require_inline = (allow_inline && ci->is_hot());
+
+      if (allow_inline) {
+        CallGenerator* cg = CallGenerator::for_inline(call_method, expected_uses);
+        if (cg == NULL) {
+          // Fall through.
+        } else if (require_inline || !InlineWarmCalls) {
+          return cg;
+        } else {
+          CallGenerator* cold_cg = call_generator(call_method, vtable_index, call_is_virtual, jvms, false, prof_factor);
+          return CallGenerator::for_warm_call(ci, cold_cg, cg);
+        }
+      }
+    }
+
+    // Try using the type profile.
+    if (call_is_virtual && site_count > 0 && receiver_count > 0) {
+      // The major receiver's count >= TypeProfileMajorReceiverPercent of site_count.
+      bool have_major_receiver = (100.*profile.receiver_prob(0) >= (float)TypeProfileMajorReceiverPercent);
+      ciMethod* receiver_method = NULL;
+      if (have_major_receiver || profile.morphism() == 1 ||
+          (profile.morphism() == 2 && UseBimorphicInlining)) {
+        // receiver_method = profile.method();
+        // Profiles do not suggest methods now.  Look it up in the major receiver.
+        receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+                                                      profile.receiver(0));
+      }
+      if (receiver_method != NULL) {
+        // The single majority receiver sufficiently outweighs the minority.
+        CallGenerator* hit_cg = this->call_generator(receiver_method,
+              vtable_index, !call_is_virtual, jvms, allow_inline, prof_factor);
+        if (hit_cg != NULL) {
+          // Look up second receiver.
+          CallGenerator* next_hit_cg = NULL;
+          ciMethod* next_receiver_method = NULL;
+          if (profile.morphism() == 2 && UseBimorphicInlining) {
+            next_receiver_method = call_method->resolve_invoke(jvms->method()->holder(),
+                                                               profile.receiver(1));
+            if (next_receiver_method != NULL) {
+              next_hit_cg = this->call_generator(next_receiver_method,
+                                  vtable_index, !call_is_virtual, jvms,
+                                  allow_inline, prof_factor);
+              if (next_hit_cg != NULL && !next_hit_cg->is_inline() &&
+                  have_major_receiver && UseOnlyInlinedBimorphic) {
+                  // Skip if we can't inline second receiver's method
+                  next_hit_cg = NULL;
+              }
+            }
+          }
+          CallGenerator* miss_cg;
+          if (( profile.morphism() == 1 ||
+               (profile.morphism() == 2 && next_hit_cg != NULL) ) &&
+
+              !too_many_traps(Deoptimization::Reason_class_check)
+
+              // Check only total number of traps per method to allow
+              // the transition from monomorphic to bimorphic case between
+              // compilations without falling into virtual call.
+              // A monomorphic case may have the class_check trap flag is set
+              // due to the time gap between the uncommon trap processing
+              // when flags are set in MDO and the call site bytecode execution
+              // in Interpreter when MDO counters are updated.
+              // There was also class_check trap in monomorphic case due to
+              // the bug 6225440.
+
+             ) {
+            // Generate uncommon trap for class check failure path
+            // in case of monomorphic or bimorphic virtual call site.
+            miss_cg = CallGenerator::for_uncommon_trap(call_method,
+                        Deoptimization::Reason_class_check,
+                        Deoptimization::Action_maybe_recompile);
+          } else {
+            // Generate virtual call for class check failure path
+            // in case of polymorphic virtual call site.
+            miss_cg = CallGenerator::for_virtual_call(call_method, vtable_index);
+          }
+          if (miss_cg != NULL) {
+            if (next_hit_cg != NULL) {
+              NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth(), jvms->bci(), next_receiver_method, profile.receiver(1), site_count, profile.receiver_count(1)));
+              // We don't need to record dependency on a receiver here and below.
+              // Whenever we inline, the dependency is added by Parse::Parse().
+              miss_cg = CallGenerator::for_predicted_call(profile.receiver(1), miss_cg, next_hit_cg, PROB_MAX);
+            }
+            if (miss_cg != NULL) {
+              NOT_PRODUCT(trace_type_profile(jvms->method(), jvms->depth(), jvms->bci(), receiver_method, profile.receiver(0), site_count, receiver_count));
+              cg = CallGenerator::for_predicted_call(profile.receiver(0), miss_cg, hit_cg, profile.receiver_prob(0));
+              if (cg != NULL)  return cg;
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // There was no special inlining tactic, or it bailed out.
+  // Use a more generic tactic, like a simple call.
+  if (call_is_virtual) {
+    return CallGenerator::for_virtual_call(call_method, vtable_index);
+  } else {
+    // Class Hierarchy Analysis or Type Profile reveals a unique target,
+    // or it is a static or special call.
+    return CallGenerator::for_direct_call(call_method);
+  }
+}
+
+
+// uncommon-trap call-sites where callee is unloaded, uninitialized or will not link
+bool Parse::can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass* klass) {
+  // Additional inputs to consider...
+  // bc      = bc()
+  // caller  = method()
+  // iter().get_method_holder_index()
+  assert( dest_method->is_loaded(), "ciTypeFlow should not let us get here" );
+  // Interface classes can be loaded & linked and never get around to
+  // being initialized.  Uncommon-trap for not-initialized static or
+  // v-calls.  Let interface calls happen.
+  ciInstanceKlass* holder_klass  = dest_method->holder();
+  if (!holder_klass->is_initialized() &&
+      !holder_klass->is_interface()) {
+    uncommon_trap(Deoptimization::Reason_uninitialized,
+                  Deoptimization::Action_reinterpret,
+                  holder_klass);
+    return true;
+  }
+
+  assert(dest_method->will_link(method()->holder(), klass, bc()), "dest_method: typeflow responsibility");
+  return false;
+}
+
+
+//------------------------------do_call----------------------------------------
+// Handle your basic call.  Inline if we can & want to, else just setup call.
+void Parse::do_call() {
+  // It's likely we are going to add debug info soon.
+  // Also, if we inline a guy who eventually needs debug info for this JVMS,
+  // our contribution to it is cleaned up right here.
+  kill_dead_locals();
+
+  // Set frequently used booleans
+  bool is_virtual = bc() == Bytecodes::_invokevirtual;
+  bool is_virtual_or_interface = is_virtual || bc() == Bytecodes::_invokeinterface;
+  bool has_receiver = is_virtual_or_interface || bc() == Bytecodes::_invokespecial;
+
+  // Find target being called
+  bool             will_link;
+  ciMethod*        dest_method   = iter().get_method(will_link);
+  ciInstanceKlass* holder_klass  = dest_method->holder();
+  ciKlass* holder = iter().get_declared_method_holder();
+  ciInstanceKlass* klass = ciEnv::get_instance_klass_for_declared_method_holder(holder);
+
+  int   nargs    = dest_method->arg_size();
+
+  // uncommon-trap when callee is unloaded, uninitialized or will not link
+  // bailout when too many arguments for register representation
+  if (!will_link || can_not_compile_call_site(dest_method, klass)) {
+#ifndef PRODUCT
+    if (PrintOpto && (Verbose || WizardMode)) {
+      method()->print_name(); tty->print_cr(" can not compile call at bci %d to:", bci());
+      dest_method->print_name(); tty->cr();
+    }
+#endif
+    return;
+  }
+  assert(holder_klass->is_loaded(), "");
+  assert(dest_method->is_static() == !has_receiver, "must match bc");
+  // Note: this takes into account invokeinterface of methods declared in java/lang/Object,
+  // which should be invokevirtuals but according to the VM spec may be invokeinterfaces
+  assert(holder_klass->is_interface() || holder_klass->super() == NULL || (bc() != Bytecodes::_invokeinterface), "must match bc");
+  // Note:  In the absence of miranda methods, an abstract class K can perform
+  // an invokevirtual directly on an interface method I.m if K implements I.
+
+  // ---------------------
+  // Does Class Hierarchy Analysis reveal only a single target of a v-call?
+  // Then we may inline or make a static call, but become dependent on there being only 1 target.
+  // Does the call-site type profile reveal only one receiver?
+  // Then we may introduce a run-time check and inline on the path where it succeeds.
+  // The other path may uncommon_trap, check for another receiver, or do a v-call.
+
+  // Choose call strategy.
+  bool call_is_virtual = is_virtual_or_interface;
+  int vtable_index = methodOopDesc::invalid_vtable_index;
+  ciMethod* call_method = dest_method;
+
+  // Try to get the most accurate receiver type
+  if (is_virtual_or_interface) {
+    Node*             receiver_node = stack(sp() - nargs);
+    const TypeOopPtr* receiver_type = _gvn.type(receiver_node)->isa_oopptr();
+    ciMethod* optimized_virtual_method = optimize_inlining(method(), bci(), klass, dest_method, receiver_type);
+
+    // Have the call been sufficiently improved such that it is no longer a virtual?
+    if (optimized_virtual_method != NULL) {
+      call_method     = optimized_virtual_method;
+      call_is_virtual = false;
+    } else if (!UseInlineCaches && is_virtual && call_method->is_loaded()) {
+      // We can make a vtable call at this site
+      vtable_index = call_method->resolve_vtable_index(method()->holder(), klass);
+    }
+  }
+
+  // Note:  It's OK to try to inline a virtual call.
+  // The call generator will not attempt to inline a polymorphic call
+  // unless it knows how to optimize the receiver dispatch.
+  bool try_inline = (C->do_inlining() || InlineAccessors);
+
+  // ---------------------
+  inc_sp(- nargs);              // Temporarily pop args for JVM state of call
+  JVMState* jvms = sync_jvms();
+
+  // ---------------------
+  // Decide call tactic.
+  // This call checks with CHA, the interpreter profile, intrinsics table, etc.
+  // It decides whether inlining is desirable or not.
+  CallGenerator* cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+
+  // ---------------------
+  // Round double arguments before call
+  round_double_arguments(dest_method);
+
+#ifndef PRODUCT
+  // bump global counters for calls
+  count_compiled_calls(false/*at_method_entry*/, cg->is_inline());
+
+  // Record first part of parsing work for this call
+  parse_histogram()->record_change();
+#endif // not PRODUCT
+
+  assert(jvms == this->jvms(), "still operating on the right JVMS");
+  assert(jvms_in_sync(),       "jvms must carry full info into CG");
+
+  // save across call, for a subsequent cast_not_null.
+  Node* receiver = has_receiver ? argument(0) : NULL;
+
+  // Bump method data counters (We profile *before* the call is made
+  // because exceptions don't return to the call site.)
+  profile_call(receiver);
+
+  JVMState* new_jvms;
+  if ((new_jvms = cg->generate(jvms)) == NULL) {
+    // When inlining attempt fails (e.g., too many arguments),
+    // it may contaminate the current compile state, making it
+    // impossible to pull back and try again.  Once we call
+    // cg->generate(), we are committed.  If it fails, the whole
+    // compilation task is compromised.
+    if (failing())  return;
+#ifndef PRODUCT
+    if (PrintOpto || PrintOptoInlining || PrintInlining) {
+      // Only one fall-back, so if an intrinsic fails, ignore any bytecodes.
+      if (cg->is_intrinsic() && call_method->code_size() > 0) {
+        tty->print("Bailed out of intrinsic, will not inline: ");
+        call_method->print_name(); tty->cr();
+      }
+    }
+#endif
+    // This can happen if a library intrinsic is available, but refuses
+    // the call site, perhaps because it did not match a pattern the
+    // intrinsic was expecting to optimize.  The fallback position is
+    // to call out-of-line.
+    try_inline = false;  // Inline tactic bailed out.
+    cg = C->call_generator(call_method, vtable_index, call_is_virtual, jvms, try_inline, prof_factor());
+    if ((new_jvms = cg->generate(jvms)) == NULL) {
+      guarantee(failing(), "call failed to generate:  calls should work");
+      return;
+    }
+  }
+
+  if (cg->is_inline()) {
+    C->env()->notice_inlined_method(call_method);
+  }
+
+  // Reset parser state from [new_]jvms, which now carries results of the call.
+  // Return value (if any) is already pushed on the stack by the cg.
+  add_exception_states_from(new_jvms);
+  if (new_jvms->map()->control() == top()) {
+    stop_and_kill_map();
+  } else {
+    assert(new_jvms->same_calls_as(jvms), "method/bci left unchanged");
+    set_jvms(new_jvms);
+  }
+
+  if (!stopped()) {
+    // This was some sort of virtual call, which did a null check for us.
+    // Now we can assert receiver-not-null, on the normal return path.
+    if (receiver != NULL && cg->is_virtual()) {
+      Node* cast = cast_not_null(receiver);
+      // %%% assert(receiver == cast, "should already have cast the receiver");
+    }
+
+    // Round double result after a call from strict to non-strict code
+    round_double_result(dest_method);
+
+    // If the return type of the method is not loaded, assert that the
+    // value we got is a null.  Otherwise, we need to recompile.
+    if (!dest_method->return_type()->is_loaded()) {
+#ifndef PRODUCT
+      if (PrintOpto && (Verbose || WizardMode)) {
+        method()->print_name(); tty->print_cr(" asserting nullness of result at bci: %d", bci());
+        dest_method->print_name(); tty->cr();
+      }
+#endif
+      if (C->log() != NULL) {
+        C->log()->elem("assert_null reason='return' klass='%d'",
+                       C->log()->identify(dest_method->return_type()));
+      }
+      // If there is going to be a trap, put it at the next bytecode:
+      set_bci(iter().next_bci());
+      do_null_assert(peek(), T_OBJECT);
+      set_bci(iter().cur_bci()); // put it back
+    }
+  }
+
+  // Restart record of parsing work after possible inlining of call
+#ifndef PRODUCT
+  parse_histogram()->set_initial_state(bc());
+#endif
+}
+
+//---------------------------catch_call_exceptions-----------------------------
+// Put a Catch and CatchProj nodes behind a just-created call.
+// Send their caught exceptions to the proper handler.
+// This may be used after a call to the rethrow VM stub,
+// when it is needed to process unloaded exception classes.
+void Parse::catch_call_exceptions(ciExceptionHandlerStream& handlers) {
+  // Exceptions are delivered through this channel:
+  Node* i_o = this->i_o();
+
+  // Add a CatchNode.
+  GrowableArray<int>* bcis = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, -1);
+  GrowableArray<const Type*>* extypes = new (C->node_arena()) GrowableArray<const Type*>(C->node_arena(), 8, 0, NULL);
+  GrowableArray<int>* saw_unloaded = new (C->node_arena()) GrowableArray<int>(C->node_arena(), 8, 0, 0);
+
+  for (; !handlers.is_done(); handlers.next()) {
+    ciExceptionHandler* h        = handlers.handler();
+    int                 h_bci    = h->handler_bci();
+    ciInstanceKlass*    h_klass  = h->is_catch_all() ? env()->Throwable_klass() : h->catch_klass();
+    // Do not introduce unloaded exception types into the graph:
+    if (!h_klass->is_loaded()) {
+      if (saw_unloaded->contains(h_bci)) {
+        /* We've already seen an unloaded exception with h_bci,
+           so don't duplicate. Duplication will cause the CatchNode to be
+           unnecessarily large. See 4713716. */
+        continue;
+      } else {
+        saw_unloaded->append(h_bci);
+      }
+    }
+    const Type*         h_extype = TypeOopPtr::make_from_klass(h_klass);
+    // (We use make_from_klass because it respects UseUniqueSubclasses.)
+    h_extype = h_extype->join(TypeInstPtr::NOTNULL);
+    assert(!h_extype->empty(), "sanity");
+    // Note:  It's OK if the BCIs repeat themselves.
+    bcis->append(h_bci);
+    extypes->append(h_extype);
+  }
+
+  int len = bcis->length();
+  CatchNode *cn = new (C, 2) CatchNode(control(), i_o, len+1);
+  Node *catch_ = _gvn.transform(cn);
+
+  // now branch with the exception state to each of the (potential)
+  // handlers
+  for(int i=0; i < len; i++) {
+    // Setup JVM state to enter the handler.
+    PreserveJVMState pjvms(this);
+    // Locals are just copied from before the call.
+    // Get control from the CatchNode.
+    int handler_bci = bcis->at(i);
+    Node* ctrl = _gvn.transform( new (C, 1) CatchProjNode(catch_, i+1,handler_bci));
+    // This handler cannot happen?
+    if (ctrl == top())  continue;
+    set_control(ctrl);
+
+    // Create exception oop
+    const TypeInstPtr* extype = extypes->at(i)->is_instptr();
+    Node *ex_oop = _gvn.transform(new (C, 2) CreateExNode(extypes->at(i), ctrl, i_o));
+
+    // Handle unloaded exception classes.
+    if (saw_unloaded->contains(handler_bci)) {
+      // An unloaded exception type is coming here.  Do an uncommon trap.
+#ifndef PRODUCT
+      // We do not expect the same handler bci to take both cold unloaded
+      // and hot loaded exceptions.  But, watch for it.
+      if (extype->is_loaded()) {
+        tty->print_cr("Warning: Handler @%d takes mixed loaded/unloaded exceptions in ");
+        method()->print_name(); tty->cr();
+      } else if (PrintOpto && (Verbose || WizardMode)) {
+        tty->print("Bailing out on unloaded exception type ");
+        extype->klass()->print_name();
+        tty->print(" at bci:%d in ", bci());
+        method()->print_name(); tty->cr();
+      }
+#endif
+      // Emit an uncommon trap instead of processing the block.
+      set_bci(handler_bci);
+      push_ex_oop(ex_oop);
+      uncommon_trap(Deoptimization::Reason_unloaded,
+                    Deoptimization::Action_reinterpret,
+                    extype->klass(), "!loaded exception");
+      set_bci(iter().cur_bci()); // put it back
+      continue;
+    }
+
+    // go to the exception handler
+    if (handler_bci < 0) {     // merge with corresponding rethrow node
+      throw_to_exit(make_exception_state(ex_oop));
+    } else {                      // Else jump to corresponding handle
+      push_ex_oop(ex_oop);        // Clear stack and push just the oop.
+      merge_exception(handler_bci);
+    }
+  }
+
+  // The first CatchProj is for the normal return.
+  // (Note:  If this is a call to rethrow_Java, this node goes dead.)
+  set_control(_gvn.transform( new (C, 1) CatchProjNode(catch_, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci)));
+}
+
+
+//----------------------------catch_inline_exceptions--------------------------
+// Handle all exceptions thrown by an inlined method or individual bytecode.
+// Common case 1: we have no handler, so all exceptions merge right into
+// the rethrow case.
+// Case 2: we have some handlers, with loaded exception klasses that have
+// no subklasses.  We do a Deutsch-Shiffman style type-check on the incoming
+// exception oop and branch to the handler directly.
+// Case 3: We have some handlers with subklasses or are not loaded at
+// compile-time.  We have to call the runtime to resolve the exception.
+// So we insert a RethrowCall and all the logic that goes with it.
+void Parse::catch_inline_exceptions(SafePointNode* ex_map) {
+  // Caller is responsible for saving away the map for normal control flow!
+  assert(stopped(), "call set_map(NULL) first");
+  assert(method()->has_exception_handlers(), "don't come here w/o work to do");
+
+  Node* ex_node = saved_ex_oop(ex_map);
+  if (ex_node == top()) {
+    // No action needed.
+    return;
+  }
+  const TypeInstPtr* ex_type = _gvn.type(ex_node)->isa_instptr();
+  NOT_PRODUCT(if (ex_type==NULL) tty->print_cr("*** Exception not InstPtr"));
+  if (ex_type == NULL)
+    ex_type = TypeOopPtr::make_from_klass(env()->Throwable_klass())->is_instptr();
+
+  // determine potential exception handlers
+  ciExceptionHandlerStream handlers(method(), bci(),
+                                    ex_type->klass()->as_instance_klass(),
+                                    ex_type->klass_is_exact());
+
+  // Start executing from the given throw state.  (Keep its stack, for now.)
+  // Get the exception oop as known at compile time.
+  ex_node = use_exception_state(ex_map);
+
+  // Get the exception oop klass from its header
+  Node* ex_klass_node = NULL;
+  if (has_ex_handler() && !ex_type->klass_is_exact()) {
+    Node* p = basic_plus_adr( ex_node, ex_node, oopDesc::klass_offset_in_bytes());
+    ex_klass_node = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+
+    // Compute the exception klass a little more cleverly.
+    // Obvious solution is to simple do a LoadKlass from the 'ex_node'.
+    // However, if the ex_node is a PhiNode, I'm going to do a LoadKlass for
+    // each arm of the Phi.  If I know something clever about the exceptions
+    // I'm loading the class from, I can replace the LoadKlass with the
+    // klass constant for the exception oop.
+    if( ex_node->is_Phi() ) {
+      ex_klass_node = new (C, ex_node->req()) PhiNode( ex_node->in(0), TypeKlassPtr::OBJECT );
+      for( uint i = 1; i < ex_node->req(); i++ ) {
+        Node* p = basic_plus_adr( ex_node->in(i), ex_node->in(i), oopDesc::klass_offset_in_bytes() );
+        Node* k = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS, TypeKlassPtr::OBJECT));
+        ex_klass_node->init_req( i, k );
+      }
+      _gvn.set_type(ex_klass_node, TypeKlassPtr::OBJECT);
+
+    }
+  }
+
+  // Scan the exception table for applicable handlers.
+  // If none, we can call rethrow() and be done!
+  // If precise (loaded with no subklasses), insert a D.S. style
+  // pointer compare to the correct handler and loop back.
+  // If imprecise, switch to the Rethrow VM-call style handling.
+
+  int remaining = handlers.count_remaining();
+
+  // iterate through all entries sequentially
+  for (;!handlers.is_done(); handlers.next()) {
+    // Do nothing if turned off
+    if( !DeutschShiffmanExceptions ) break;
+    ciExceptionHandler* handler = handlers.handler();
+
+    if (handler->is_rethrow()) {
+      // If we fell off the end of the table without finding an imprecise
+      // exception klass (and without finding a generic handler) then we
+      // know this exception is not handled in this method.  We just rethrow
+      // the exception into the caller.
+      throw_to_exit(make_exception_state(ex_node));
+      return;
+    }
+
+    // exception handler bci range covers throw_bci => investigate further
+    int handler_bci = handler->handler_bci();
+
+    if (remaining == 1) {
+      push_ex_oop(ex_node);        // Push exception oop for handler
+#ifndef PRODUCT
+      if (PrintOpto && WizardMode) {
+        tty->print_cr("  Catching every inline exception bci:%d -> handler_bci:%d", bci(), handler_bci);
+      }
+#endif
+      merge_exception(handler_bci); // jump to handler
+      return;                   // No more handling to be done here!
+    }
+
+    // %%% The following logic replicates make_from_klass_unique.
+    // TO DO:  Replace by a subroutine call.  Then generalize
+    // the type check, as noted in the next "%%%" comment.
+
+    ciInstanceKlass* klass = handler->catch_klass();
+    if (UseUniqueSubclasses) {
+      // (We use make_from_klass because it respects UseUniqueSubclasses.)
+      const TypeOopPtr* tp = TypeOopPtr::make_from_klass(klass);
+      klass = tp->klass()->as_instance_klass();
+    }
+
+    // Get the handler's klass
+    if (!klass->is_loaded())    // klass is not loaded?
+      break;                    // Must call Rethrow!
+    if (klass->is_interface())  // should not happen, but...
+      break;                    // bail out
+    // See if the loaded exception klass has no subtypes
+    if (klass->has_subklass())
+      break;                    // Cannot easily do precise test ==> Rethrow
+
+    // %%% Now that subclass checking is very fast, we need to rewrite
+    // this section and remove the option "DeutschShiffmanExceptions".
+    // The exception processing chain should be a normal typecase pattern,
+    // with a bailout to the interpreter only in the case of unloaded
+    // classes.  (The bailout should mark the method non-entrant.)
+    // This rewrite should be placed in GraphKit::, not Parse::.
+
+    // Add a dependence; if any subclass added we need to recompile
+    // %%% should use stronger assert_unique_concrete_subtype instead
+    if (!klass->is_final()) {
+      C->dependencies()->assert_leaf_type(klass);
+    }
+
+    // Implement precise test
+    const TypeKlassPtr *tk = TypeKlassPtr::make(klass);
+    Node* con = _gvn.makecon(tk);
+    Node* cmp = _gvn.transform( new (C, 3) CmpPNode(ex_klass_node, con) );
+    Node* bol = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+    { BuildCutout unless(this, bol, PROB_LIKELY(0.7f));
+      const TypeInstPtr* tinst = TypeInstPtr::make_exact(TypePtr::NotNull, klass);
+      Node* ex_oop = _gvn.transform(new (C, 2) CheckCastPPNode(control(), ex_node, tinst));
+      push_ex_oop(ex_oop);      // Push exception oop for handler
+#ifndef PRODUCT
+      if (PrintOpto && WizardMode) {
+        tty->print("  Catching inline exception bci:%d -> handler_bci:%d -- ", bci(), handler_bci);
+        klass->print_name();
+        tty->cr();
+      }
+#endif
+      merge_exception(handler_bci);
+    }
+
+    // Come here if exception does not match handler.
+    // Carry on with more handler checks.
+    --remaining;
+  }
+
+  assert(!stopped(), "you should return if you finish the chain");
+
+  if (remaining == 1) {
+    // Further checks do not matter.
+  }
+
+  if (can_rerun_bytecode()) {
+    // Do not push_ex_oop here!
+    // Re-executing the bytecode will reproduce the throwing condition.
+    bool must_throw = true;
+    uncommon_trap(Deoptimization::Reason_unhandled,
+                  Deoptimization::Action_none,
+                  (ciKlass*)NULL, (const char*)NULL, // default args
+                  must_throw);
+    return;
+  }
+
+  // Oops, need to call into the VM to resolve the klasses at runtime.
+  // Note:  This call must not deoptimize, since it is not a real at this bci!
+  kill_dead_locals();
+
+  make_runtime_call(RC_NO_LEAF | RC_MUST_THROW,
+                    OptoRuntime::rethrow_Type(),
+                    OptoRuntime::rethrow_stub(),
+                    NULL, NULL,
+                    ex_node);
+
+  // Rethrow is a pure call, no side effects, only a result.
+  // The result cannot be allocated, so we use I_O
+
+  // Catch exceptions from the rethrow
+  catch_call_exceptions(handlers);
+}
+
+
+// (Note:  Moved add_debug_info into GraphKit::add_safepoint_edges.)
+
+
+#ifndef PRODUCT
+void Parse::count_compiled_calls(bool at_method_entry, bool is_inline) {
+  if( CountCompiledCalls ) {
+    if( at_method_entry ) {
+      // bump invocation counter if top method (for statistics)
+      if (CountCompiledCalls && depth() == 1) {
+        const TypeInstPtr* addr_type = TypeInstPtr::make(method());
+        Node* adr1 = makecon(addr_type);
+        Node* adr2 = basic_plus_adr(adr1, adr1, in_bytes(methodOopDesc::compiled_invocation_counter_offset()));
+        increment_counter(adr2);
+      }
+    } else if (is_inline) {
+      switch (bc()) {
+      case Bytecodes::_invokevirtual:   increment_counter(SharedRuntime::nof_inlined_calls_addr()); break;
+      case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_inlined_interface_calls_addr()); break;
+      case Bytecodes::_invokestatic:
+      case Bytecodes::_invokespecial:   increment_counter(SharedRuntime::nof_inlined_static_calls_addr()); break;
+      default: fatal("unexpected call bytecode");
+      }
+    } else {
+      switch (bc()) {
+      case Bytecodes::_invokevirtual:   increment_counter(SharedRuntime::nof_normal_calls_addr()); break;
+      case Bytecodes::_invokeinterface: increment_counter(SharedRuntime::nof_interface_calls_addr()); break;
+      case Bytecodes::_invokestatic:
+      case Bytecodes::_invokespecial:   increment_counter(SharedRuntime::nof_static_calls_addr()); break;
+      default: fatal("unexpected call bytecode");
+      }
+    }
+  }
+}
+#endif //PRODUCT
+
+
+// Identify possible target method and inlining style
+ciMethod* Parse::optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                                   ciMethod *dest_method, const TypeOopPtr* receiver_type) {
+  // only use for virtual or interface calls
+
+  // If it is obviously final, do not bother to call find_monomorphic_target,
+  // because the class hierarchy checks are not needed, and may fail due to
+  // incompletely loaded classes.  Since we do our own class loading checks
+  // in this module, we may confidently bind to any method.
+  if (dest_method->can_be_statically_bound()) {
+    return dest_method;
+  }
+
+  // Attempt to improve the receiver
+  bool actual_receiver_is_exact = false;
+  ciInstanceKlass* actual_receiver = klass;
+  if (receiver_type != NULL) {
+    // Array methods are all inherited from Object, and are monomorphic.
+    if (receiver_type->isa_aryptr() &&
+        dest_method->holder() == env()->Object_klass()) {
+      return dest_method;
+    }
+
+    // All other interesting cases are instance klasses.
+    if (!receiver_type->isa_instptr()) {
+      return NULL;
+    }
+
+    ciInstanceKlass *ikl = receiver_type->klass()->as_instance_klass();
+    if (ikl->is_loaded() && ikl->is_initialized() && !ikl->is_interface() &&
+        (ikl == actual_receiver || ikl->is_subclass_of(actual_receiver))) {
+      // ikl is a same or better type than the original actual_receiver,
+      // e.g. static receiver from bytecodes.
+      actual_receiver = ikl;
+      // Is the actual_receiver exact?
+      actual_receiver_is_exact = receiver_type->klass_is_exact();
+    }
+  }
+
+  ciInstanceKlass*   calling_klass = caller->holder();
+  ciMethod* cha_monomorphic_target = dest_method->find_monomorphic_target(calling_klass, klass, actual_receiver);
+  if (cha_monomorphic_target != NULL) {
+    assert(!cha_monomorphic_target->is_abstract(), "");
+    // Look at the method-receiver type.  Does it add "too much information"?
+    ciKlass*    mr_klass = cha_monomorphic_target->holder();
+    const Type* mr_type  = TypeInstPtr::make(TypePtr::BotPTR, mr_klass);
+    if (receiver_type == NULL || !receiver_type->higher_equal(mr_type)) {
+      // Calling this method would include an implicit cast to its holder.
+      // %%% Not yet implemented.  Would throw minor asserts at present.
+      // %%% The most common wins are already gained by +UseUniqueSubclasses.
+      // To fix, put the higher_equal check at the call of this routine,
+      // and add a CheckCastPP to the receiver.
+      if (TraceDependencies) {
+        tty->print_cr("found unique CHA method, but could not cast up");
+        tty->print("  method  = ");
+        cha_monomorphic_target->print();
+        tty->cr();
+      }
+      if (C->log() != NULL) {
+        C->log()->elem("missed_CHA_opportunity klass='%d' method='%d'",
+                       C->log()->identify(klass),
+                       C->log()->identify(cha_monomorphic_target));
+      }
+      cha_monomorphic_target = NULL;
+    }
+  }
+  if (cha_monomorphic_target != NULL) {
+    // Hardwiring a virtual.
+    // If we inlined because CHA revealed only a single target method,
+    // then we are dependent on that target method not getting overridden
+    // by dynamic class loading.  Be sure to test the "static" receiver
+    // dest_method here, as opposed to the actual receiver, which may
+    // falsely lead us to believe that the receiver is final or private.
+    C->dependencies()->assert_unique_concrete_method(actual_receiver, cha_monomorphic_target);
+    return cha_monomorphic_target;
+  }
+
+  // If the type is exact, we can still bind the method w/o a vcall.
+  // (This case comes after CHA so we can see how much extra work it does.)
+  if (actual_receiver_is_exact) {
+    // In case of evolution, there is a dependence on every inlined method, since each
+    // such method can be changed when its class is redefined.
+    ciMethod* exact_method = dest_method->resolve_invoke(calling_klass, actual_receiver);
+    if (exact_method != NULL) {
+#ifndef PRODUCT
+      if (PrintOpto) {
+        tty->print("  Calling method via exact type @%d --- ", bci);
+        exact_method->print_name();
+        tty->cr();
+      }
+#endif
+      return exact_method;
+    }
+  }
+
+  return NULL;
+}
diff --git a/src/share/vm/opto/domgraph.cpp b/src/share/vm/opto/domgraph.cpp
new file mode 100644
index 000000000..2ef02fd0c
--- /dev/null
+++ b/src/share/vm/opto/domgraph.cpp
@@ -0,0 +1,664 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_domgraph.cpp.incl"
+
+//------------------------------Tarjan-----------------------------------------
+// A data structure that holds all the information needed to find dominators.
+struct Tarjan {
+  Block *_block;                // Basic block for this info
+
+  uint _semi;                   // Semi-dominators
+  uint _size;                   // Used for faster LINK and EVAL
+  Tarjan *_parent;              // Parent in DFS
+  Tarjan *_label;               // Used for LINK and EVAL
+  Tarjan *_ancestor;            // Used for LINK and EVAL
+  Tarjan *_child;               // Used for faster LINK and EVAL
+  Tarjan *_dom;                 // Parent in dominator tree (immediate dom)
+  Tarjan *_bucket;              // Set of vertices with given semidominator
+
+  Tarjan *_dom_child;           // Child in dominator tree
+  Tarjan *_dom_next;            // Next in dominator tree
+
+  // Fast union-find work
+  void COMPRESS();
+  Tarjan *EVAL(void);
+  void LINK( Tarjan *w, Tarjan *tarjan0 );
+
+  void setdepth( uint size );
+
+};
+
+//------------------------------Dominator--------------------------------------
+// Compute the dominator tree of the CFG.  The CFG must already have been
+// constructed.  This is the Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
+void PhaseCFG::Dominators( ) {
+  // Pre-grow the blocks array, prior to the ResourceMark kicking in
+  _blocks.map(_num_blocks,0);
+
+  ResourceMark rm;
+  // Setup mappings from my Graph to Tarjan's stuff and back
+  // Note: Tarjan uses 1-based arrays
+  Tarjan *tarjan = NEW_RESOURCE_ARRAY(Tarjan,_num_blocks+1);
+
+  // Tarjan's algorithm, almost verbatim:
+  // Step 1:
+  _rpo_ctr = _num_blocks;
+  uint dfsnum = DFS( tarjan );
+  if( dfsnum-1 != _num_blocks ) {// Check for unreachable loops!
+    // If the returned dfsnum does not match the number of blocks, then we
+    // must have some unreachable loops.  These can be made at any time by
+    // IterGVN.  They are cleaned up by CCP or the loop opts, but the last
+    // IterGVN can always make more that are not cleaned up.  Highly unlikely
+    // except in ZKM.jar, where endless irreducible loops cause the loop opts
+    // to not get run.
+    //
+    // Having found unreachable loops, we have made a bad RPO _block layout.
+    // We can re-run the above DFS pass with the correct number of blocks,
+    // and hack the Tarjan algorithm below to be robust in the presence of
+    // such dead loops (as was done for the NTarjan code farther below).
+    // Since this situation is so unlikely, instead I've decided to bail out.
+    // CNC 7/24/2001
+    C->record_method_not_compilable("unreachable loop");
+    return;
+  }
+  _blocks._cnt = _num_blocks;
+
+  // Tarjan is using 1-based arrays, so these are some initialize flags
+  tarjan[0]._size = tarjan[0]._semi = 0;
+  tarjan[0]._label = &tarjan[0];
+
+  uint i;
+  for( i=_num_blocks; i>=2; i-- ) { // For all vertices in DFS order
+    Tarjan *w = &tarjan[i];     // Get vertex from DFS
+
+    // Step 2:
+    Node *whead = w->_block->head();
+    for( uint j=1; j < whead->req(); j++ ) {
+      Block *b = _bbs[whead->in(j)->_idx];
+      Tarjan *vx = &tarjan[b->_pre_order];
+      Tarjan *u = vx->EVAL();
+      if( u->_semi < w->_semi )
+        w->_semi = u->_semi;
+    }
+
+    // w is added to a bucket here, and only here.
+    // Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
+    // Thus bucket can be a linked list.
+    // Thus we do not need a small integer name for each Block.
+    w->_bucket = tarjan[w->_semi]._bucket;
+    tarjan[w->_semi]._bucket = w;
+
+    w->_parent->LINK( w, &tarjan[0] );
+
+    // Step 3:
+    for( Tarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
+      Tarjan *u = vx->EVAL();
+      vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
+    }
+  }
+
+  // Step 4:
+  for( i=2; i <= _num_blocks; i++ ) {
+    Tarjan *w = &tarjan[i];
+    if( w->_dom != &tarjan[w->_semi] )
+      w->_dom = w->_dom->_dom;
+    w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
+  }
+  // No immediate dominator for the root
+  Tarjan *w = &tarjan[_broot->_pre_order];
+  w->_dom = NULL;
+  w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
+
+  // Convert the dominator tree array into my kind of graph
+  for( i=1; i<=_num_blocks;i++){// For all Tarjan vertices
+    Tarjan *t = &tarjan[i];     // Handy access
+    Tarjan *tdom = t->_dom;     // Handy access to immediate dominator
+    if( tdom )  {               // Root has no immediate dominator
+      t->_block->_idom = tdom->_block; // Set immediate dominator
+      t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
+      tdom->_dom_child = t;     // Make me a child of my parent
+    } else
+      t->_block->_idom = NULL;  // Root
+  }
+  w->setdepth( _num_blocks+1 ); // Set depth in dominator tree
+
+}
+
+//----------------------------Block_Stack--------------------------------------
+class Block_Stack {
+  private:
+    struct Block_Descr {
+      Block  *block;     // Block
+      int    index;      // Index of block's successor pushed on stack
+      int    freq_idx;   // Index of block's most frequent successor
+    };
+    Block_Descr *_stack_top;
+    Block_Descr *_stack_max;
+    Block_Descr *_stack;
+    Tarjan *_tarjan;
+    uint most_frequent_successor( Block *b );
+  public:
+    Block_Stack(Tarjan *tarjan, int size) : _tarjan(tarjan) {
+      _stack = NEW_RESOURCE_ARRAY(Block_Descr, size);
+      _stack_max = _stack + size;
+      _stack_top = _stack - 1; // stack is empty
+    }
+    void push(uint pre_order, Block *b) {
+      Tarjan *t = &_tarjan[pre_order]; // Fast local access
+      b->_pre_order = pre_order;    // Flag as visited
+      t->_block = b;                // Save actual block
+      t->_semi = pre_order;         // Block to DFS map
+      t->_label = t;                // DFS to vertex map
+      t->_ancestor = NULL;          // Fast LINK & EVAL setup
+      t->_child = &_tarjan[0];      // Sentenial
+      t->_size = 1;
+      t->_bucket = NULL;
+      if (pre_order == 1)
+        t->_parent = NULL;          // first block doesn't have parent
+      else {
+        // Save parent (currernt top block on stack) in DFS
+        t->_parent = &_tarjan[_stack_top->block->_pre_order];
+      }
+      // Now put this block on stack
+      ++_stack_top;
+      assert(_stack_top < _stack_max, ""); // assert if stack have to grow
+      _stack_top->block  = b;
+      _stack_top->index  = -1;
+      // Find the index into b->succs[] array of the most frequent successor.
+      _stack_top->freq_idx = most_frequent_successor(b); // freq_idx >= 0
+    }
+    Block* pop() { Block* b = _stack_top->block; _stack_top--; return b; }
+    bool is_nonempty() { return (_stack_top >= _stack); }
+    bool last_successor() { return (_stack_top->index == _stack_top->freq_idx); }
+    Block* next_successor()  {
+      int i = _stack_top->index;
+      i++;
+      if (i == _stack_top->freq_idx) i++;
+      if (i >= (int)(_stack_top->block->_num_succs)) {
+        i = _stack_top->freq_idx;   // process most frequent successor last
+      }
+      _stack_top->index = i;
+      return _stack_top->block->_succs[ i ];
+    }
+};
+
+//-------------------------most_frequent_successor-----------------------------
+// Find the index into the b->succs[] array of the most frequent successor.
+uint Block_Stack::most_frequent_successor( Block *b ) {
+  uint freq_idx = 0;
+  int eidx = b->end_idx();
+  Node *n = b->_nodes[eidx];
+  int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
+  switch( op ) {
+  case Op_CountedLoopEnd:
+  case Op_If: {               // Split frequency amongst children
+    float prob = n->as_MachIf()->_prob;
+    // Is succ[0] the TRUE branch or the FALSE branch?
+    if( b->_nodes[eidx+1]->Opcode() == Op_IfFalse )
+      prob = 1.0f - prob;
+    freq_idx = prob < PROB_FAIR;      // freq=1 for succ[0] < 0.5 prob
+    break;
+  }
+  case Op_Catch:                // Split frequency amongst children
+    for( freq_idx = 0; freq_idx < b->_num_succs; freq_idx++ )
+      if( b->_nodes[eidx+1+freq_idx]->as_CatchProj()->_con == CatchProjNode::fall_through_index )
+        break;
+    // Handle case of no fall-thru (e.g., check-cast MUST throw an exception)
+    if( freq_idx == b->_num_succs ) freq_idx = 0;
+    break;
+    // Currently there is no support for finding out the most
+    // frequent successor for jumps, so lets just make it the first one
+  case Op_Jump:
+  case Op_Root:
+  case Op_Goto:
+  case Op_NeverBranch:
+    freq_idx = 0;               // fall thru
+    break;
+  case Op_TailCall:
+  case Op_TailJump:
+  case Op_Return:
+  case Op_Halt:
+  case Op_Rethrow:
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return freq_idx;
+}
+
+//------------------------------DFS--------------------------------------------
+// Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
+// 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
+uint PhaseCFG::DFS( Tarjan *tarjan ) {
+  Block *b = _broot;
+  uint pre_order = 1;
+  // Allocate stack of size _num_blocks+1 to avoid frequent realloc
+  Block_Stack bstack(tarjan, _num_blocks+1);
+
+  // Push on stack the state for the first block
+  bstack.push(pre_order, b);
+  ++pre_order;
+
+  while (bstack.is_nonempty()) {
+    if (!bstack.last_successor()) {
+      // Walk over all successors in pre-order (DFS).
+      Block *s = bstack.next_successor();
+      if (s->_pre_order == 0) { // Check for no-pre-order, not-visited
+        // Push on stack the state of successor
+        bstack.push(pre_order, s);
+        ++pre_order;
+      }
+    }
+    else {
+      // Build a reverse post-order in the CFG _blocks array
+      Block *stack_top = bstack.pop();
+      stack_top->_rpo = --_rpo_ctr;
+      _blocks.map(stack_top->_rpo, stack_top);
+    }
+  }
+  return pre_order;
+}
+
+//------------------------------COMPRESS---------------------------------------
+void Tarjan::COMPRESS()
+{
+  assert( _ancestor != 0, "" );
+  if( _ancestor->_ancestor != 0 ) {
+    _ancestor->COMPRESS( );
+    if( _ancestor->_label->_semi < _label->_semi )
+      _label = _ancestor->_label;
+    _ancestor = _ancestor->_ancestor;
+  }
+}
+
+//------------------------------EVAL-------------------------------------------
+Tarjan *Tarjan::EVAL() {
+  if( !_ancestor ) return _label;
+  COMPRESS();
+  return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
+}
+
+//------------------------------LINK-------------------------------------------
+void Tarjan::LINK( Tarjan *w, Tarjan *tarjan0 ) {
+  Tarjan *s = w;
+  while( w->_label->_semi < s->_child->_label->_semi ) {
+    if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
+      s->_child->_ancestor = s;
+      s->_child = s->_child->_child;
+    } else {
+      s->_child->_size = s->_size;
+      s = s->_ancestor = s->_child;
+    }
+  }
+  s->_label = w->_label;
+  _size += w->_size;
+  if( _size < (w->_size << 1) ) {
+    Tarjan *tmp = s; s = _child; _child = tmp;
+  }
+  while( s != tarjan0 ) {
+    s->_ancestor = this;
+    s = s->_child;
+  }
+}
+
+//------------------------------setdepth---------------------------------------
+void Tarjan::setdepth( uint stack_size ) {
+  Tarjan **top  = NEW_RESOURCE_ARRAY(Tarjan*, stack_size);
+  Tarjan **next = top;
+  Tarjan **last;
+  uint depth = 0;
+  *top = this;
+  ++top;
+  do {
+    // next level
+    ++depth;
+    last = top;
+    do {
+      // Set current depth for all tarjans on this level
+      Tarjan *t = *next;     // next tarjan from stack
+      ++next;
+      do {
+        t->_block->_dom_depth = depth; // Set depth in dominator tree
+        Tarjan *dom_child = t->_dom_child;
+        t = t->_dom_next;    // next tarjan
+        if (dom_child != NULL) {
+          *top = dom_child;  // save child on stack
+          ++top;
+        }
+      } while (t != NULL);
+    } while (next < last);
+  } while (last < top);
+}
+
+//*********************** DOMINATORS ON THE SEA OF NODES***********************
+//------------------------------NTarjan----------------------------------------
+// A data structure that holds all the information needed to find dominators.
+struct NTarjan {
+  Node *_control;               // Control node associated with this info
+
+  uint _semi;                   // Semi-dominators
+  uint _size;                   // Used for faster LINK and EVAL
+  NTarjan *_parent;             // Parent in DFS
+  NTarjan *_label;              // Used for LINK and EVAL
+  NTarjan *_ancestor;           // Used for LINK and EVAL
+  NTarjan *_child;              // Used for faster LINK and EVAL
+  NTarjan *_dom;                // Parent in dominator tree (immediate dom)
+  NTarjan *_bucket;             // Set of vertices with given semidominator
+
+  NTarjan *_dom_child;          // Child in dominator tree
+  NTarjan *_dom_next;           // Next in dominator tree
+
+  // Perform DFS search.
+  // Setup 'vertex' as DFS to vertex mapping.
+  // Setup 'semi' as vertex to DFS mapping.
+  // Set 'parent' to DFS parent.
+  static int DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder );
+  void setdepth( uint size, uint *dom_depth );
+
+  // Fast union-find work
+  void COMPRESS();
+  NTarjan *EVAL(void);
+  void LINK( NTarjan *w, NTarjan *ntarjan0 );
+#ifndef PRODUCT
+  void dump(int offset) const;
+#endif
+};
+
+//------------------------------Dominator--------------------------------------
+// Compute the dominator tree of the sea of nodes.  This version walks all CFG
+// nodes (using the is_CFG() call) and places them in a dominator tree.  Thus,
+// it needs a count of the CFG nodes for the mapping table. This is the
+// Lengauer & Tarjan O(E-alpha(E,V)) algorithm.
+void PhaseIdealLoop::Dominators( ) {
+  ResourceMark rm;
+  // Setup mappings from my Graph to Tarjan's stuff and back
+  // Note: Tarjan uses 1-based arrays
+  NTarjan *ntarjan = NEW_RESOURCE_ARRAY(NTarjan,C->unique()+1);
+  // Initialize _control field for fast reference
+  int i;
+  for( i= C->unique()-1; i>=0; i-- )
+    ntarjan[i]._control = NULL;
+
+  // Store the DFS order for the main loop
+  uint *dfsorder = NEW_RESOURCE_ARRAY(uint,C->unique()+1);
+  memset(dfsorder, max_uint, (C->unique()+1) * sizeof(uint));
+
+  // Tarjan's algorithm, almost verbatim:
+  // Step 1:
+  VectorSet visited(Thread::current()->resource_area());
+  int dfsnum = NTarjan::DFS( ntarjan, visited, this, dfsorder);
+
+  // Tarjan is using 1-based arrays, so these are some initialize flags
+  ntarjan[0]._size = ntarjan[0]._semi = 0;
+  ntarjan[0]._label = &ntarjan[0];
+
+  for( i = dfsnum-1; i>1; i-- ) {        // For all nodes in reverse DFS order
+    NTarjan *w = &ntarjan[i];            // Get Node from DFS
+    assert(w->_control != NULL,"bad DFS walk");
+
+    // Step 2:
+    Node *whead = w->_control;
+    for( uint j=0; j < whead->req(); j++ ) { // For each predecessor
+      if( whead->in(j) == NULL || !whead->in(j)->is_CFG() )
+        continue;                            // Only process control nodes
+      uint b = dfsorder[whead->in(j)->_idx];
+      if(b == max_uint) continue;
+      NTarjan *vx = &ntarjan[b];
+      NTarjan *u = vx->EVAL();
+      if( u->_semi < w->_semi )
+        w->_semi = u->_semi;
+    }
+
+    // w is added to a bucket here, and only here.
+    // Thus w is in at most one bucket and the sum of all bucket sizes is O(n).
+    // Thus bucket can be a linked list.
+    w->_bucket = ntarjan[w->_semi]._bucket;
+    ntarjan[w->_semi]._bucket = w;
+
+    w->_parent->LINK( w, &ntarjan[0] );
+
+    // Step 3:
+    for( NTarjan *vx = w->_parent->_bucket; vx; vx = vx->_bucket ) {
+      NTarjan *u = vx->EVAL();
+      vx->_dom = (u->_semi < vx->_semi) ? u : w->_parent;
+    }
+
+    // Cleanup any unreachable loops now.  Unreachable loops are loops that
+    // flow into the main graph (and hence into ROOT) but are not reachable
+    // from above.  Such code is dead, but requires a global pass to detect
+    // it; this global pass was the 'build_loop_tree' pass run just prior.
+    if( whead->is_Region() ) {
+      for( uint i = 1; i < whead->req(); i++ ) {
+        if (!has_node(whead->in(i))) {
+          // Kill dead input path
+          assert( !visited.test(whead->in(i)->_idx),
+                  "input with no loop must be dead" );
+          _igvn.hash_delete(whead);
+          whead->del_req(i);
+          _igvn._worklist.push(whead);
+          for (DUIterator_Fast jmax, j = whead->fast_outs(jmax); j < jmax; j++) {
+            Node* p = whead->fast_out(j);
+            if( p->is_Phi() ) {
+              _igvn.hash_delete(p);
+              p->del_req(i);
+              _igvn._worklist.push(p);
+            }
+          }
+          i--;                  // Rerun same iteration
+        } // End of if dead input path
+      } // End of for all input paths
+    } // End if if whead is a Region
+  } // End of for all Nodes in reverse DFS order
+
+  // Step 4:
+  for( i=2; i < dfsnum; i++ ) { // DFS order
+    NTarjan *w = &ntarjan[i];
+    assert(w->_control != NULL,"Bad DFS walk");
+    if( w->_dom != &ntarjan[w->_semi] )
+      w->_dom = w->_dom->_dom;
+    w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
+  }
+  // No immediate dominator for the root
+  NTarjan *w = &ntarjan[dfsorder[C->root()->_idx]];
+  w->_dom = NULL;
+  w->_parent = NULL;
+  w->_dom_next = w->_dom_child = NULL;  // Initialize for building tree later
+
+  // Convert the dominator tree array into my kind of graph
+  for( i=1; i<dfsnum; i++ ) {          // For all Tarjan vertices
+    NTarjan *t = &ntarjan[i];          // Handy access
+    assert(t->_control != NULL,"Bad DFS walk");
+    NTarjan *tdom = t->_dom;           // Handy access to immediate dominator
+    if( tdom )  {                      // Root has no immediate dominator
+      _idom[t->_control->_idx] = tdom->_control; // Set immediate dominator
+      t->_dom_next = tdom->_dom_child; // Make me a sibling of parent's child
+      tdom->_dom_child = t;            // Make me a child of my parent
+    } else
+      _idom[C->root()->_idx] = NULL; // Root
+  }
+  w->setdepth( C->unique()+1, _dom_depth ); // Set depth in dominator tree
+  // Pick up the 'top' node as well
+  _idom     [C->top()->_idx] = C->root();
+  _dom_depth[C->top()->_idx] = 1;
+
+  // Debug Print of Dominator tree
+  if( PrintDominators ) {
+#ifndef PRODUCT
+    w->dump(0);
+#endif
+  }
+}
+
+//------------------------------DFS--------------------------------------------
+// Perform DFS search.  Setup 'vertex' as DFS to vertex mapping.  Setup
+// 'semi' as vertex to DFS mapping.  Set 'parent' to DFS parent.
+int NTarjan::DFS( NTarjan *ntarjan, VectorSet &visited, PhaseIdealLoop *pil, uint *dfsorder) {
+  // Allocate stack of size C->unique()/8 to avoid frequent realloc
+  GrowableArray <Node *> dfstack(pil->C->unique() >> 3);
+  Node *b = pil->C->root();
+  int dfsnum = 1;
+  dfsorder[b->_idx] = dfsnum; // Cache parent's dfsnum for a later use
+  dfstack.push(b);
+
+  while (dfstack.is_nonempty()) {
+    b = dfstack.pop();
+    if( !visited.test_set(b->_idx) ) { // Test node and flag it as visited
+      NTarjan *w = &ntarjan[dfsnum];
+      // Only fully process control nodes
+      w->_control = b;                 // Save actual node
+      // Use parent's cached dfsnum to identify "Parent in DFS"
+      w->_parent = &ntarjan[dfsorder[b->_idx]];
+      dfsorder[b->_idx] = dfsnum;      // Save DFS order info
+      w->_semi = dfsnum;               // Node to DFS map
+      w->_label = w;                   // DFS to vertex map
+      w->_ancestor = NULL;             // Fast LINK & EVAL setup
+      w->_child = &ntarjan[0];         // Sentinal
+      w->_size = 1;
+      w->_bucket = NULL;
+
+      // Need DEF-USE info for this pass
+      for ( int i = b->outcnt(); i-- > 0; ) { // Put on stack backwards
+        Node* s = b->raw_out(i);       // Get a use
+        // CFG nodes only and not dead stuff
+        if( s->is_CFG() && pil->has_node(s) && !visited.test(s->_idx) ) {
+          dfsorder[s->_idx] = dfsnum;  // Cache parent's dfsnum for a later use
+          dfstack.push(s);
+        }
+      }
+      dfsnum++;  // update after parent's dfsnum has been cached.
+    }
+  }
+
+  return dfsnum;
+}
+
+//------------------------------COMPRESS---------------------------------------
+void NTarjan::COMPRESS()
+{
+  assert( _ancestor != 0, "" );
+  if( _ancestor->_ancestor != 0 ) {
+    _ancestor->COMPRESS( );
+    if( _ancestor->_label->_semi < _label->_semi )
+      _label = _ancestor->_label;
+    _ancestor = _ancestor->_ancestor;
+  }
+}
+
+//------------------------------EVAL-------------------------------------------
+NTarjan *NTarjan::EVAL() {
+  if( !_ancestor ) return _label;
+  COMPRESS();
+  return (_ancestor->_label->_semi >= _label->_semi) ? _label : _ancestor->_label;
+}
+
+//------------------------------LINK-------------------------------------------
+void NTarjan::LINK( NTarjan *w, NTarjan *ntarjan0 ) {
+  NTarjan *s = w;
+  while( w->_label->_semi < s->_child->_label->_semi ) {
+    if( s->_size + s->_child->_child->_size >= (s->_child->_size << 1) ) {
+      s->_child->_ancestor = s;
+      s->_child = s->_child->_child;
+    } else {
+      s->_child->_size = s->_size;
+      s = s->_ancestor = s->_child;
+    }
+  }
+  s->_label = w->_label;
+  _size += w->_size;
+  if( _size < (w->_size << 1) ) {
+    NTarjan *tmp = s; s = _child; _child = tmp;
+  }
+  while( s != ntarjan0 ) {
+    s->_ancestor = this;
+    s = s->_child;
+  }
+}
+
+//------------------------------setdepth---------------------------------------
+void NTarjan::setdepth( uint stack_size, uint *dom_depth ) {
+  NTarjan **top  = NEW_RESOURCE_ARRAY(NTarjan*, stack_size);
+  NTarjan **next = top;
+  NTarjan **last;
+  uint depth = 0;
+  *top = this;
+  ++top;
+  do {
+    // next level
+    ++depth;
+    last = top;
+    do {
+      // Set current depth for all tarjans on this level
+      NTarjan *t = *next;    // next tarjan from stack
+      ++next;
+      do {
+        dom_depth[t->_control->_idx] = depth; // Set depth in dominator tree
+        NTarjan *dom_child = t->_dom_child;
+        t = t->_dom_next;    // next tarjan
+        if (dom_child != NULL) {
+          *top = dom_child;  // save child on stack
+          ++top;
+        }
+      } while (t != NULL);
+    } while (next < last);
+  } while (last < top);
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void NTarjan::dump(int offset) const {
+  // Dump the data from this node
+  int i;
+  for(i = offset; i >0; i--)  // Use indenting for tree structure
+    tty->print("  ");
+  tty->print("Dominator Node: ");
+  _control->dump();               // Control node for this dom node
+  tty->print("\n");
+  for(i = offset; i >0; i--)      // Use indenting for tree structure
+    tty->print("  ");
+  tty->print("semi:%d, size:%d\n",_semi, _size);
+  for(i = offset; i >0; i--)      // Use indenting for tree structure
+    tty->print("  ");
+  tty->print("DFS Parent: ");
+  if(_parent != NULL)
+    _parent->_control->dump();    // Parent in DFS
+  tty->print("\n");
+  for(i = offset; i >0; i--)      // Use indenting for tree structure
+    tty->print("  ");
+  tty->print("Dom Parent: ");
+  if(_dom != NULL)
+    _dom->_control->dump();       // Parent in Dominator Tree
+  tty->print("\n");
+
+  // Recurse over remaining tree
+  if( _dom_child ) _dom_child->dump(offset+2);   // Children in dominator tree
+  if( _dom_next  ) _dom_next ->dump(offset  );   // Siblings in dominator tree
+
+}
+#endif
diff --git a/src/share/vm/opto/escape.cpp b/src/share/vm/opto/escape.cpp
new file mode 100644
index 000000000..62d0c0f1a
--- /dev/null
+++ b/src/share/vm/opto/escape.cpp
@@ -0,0 +1,1346 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_escape.cpp.incl"
+
+uint PointsToNode::edge_target(uint e) const {
+  assert(_edges != NULL && e < (uint)_edges->length(), "valid edge index");
+  return (_edges->at(e) >> EdgeShift);
+}
+
+PointsToNode::EdgeType PointsToNode::edge_type(uint e) const {
+  assert(_edges != NULL && e < (uint)_edges->length(), "valid edge index");
+  return (EdgeType) (_edges->at(e) & EdgeMask);
+}
+
+void PointsToNode::add_edge(uint targIdx, PointsToNode::EdgeType et) {
+  uint v = (targIdx << EdgeShift) + ((uint) et);
+  if (_edges == NULL) {
+     Arena *a = Compile::current()->comp_arena();
+    _edges = new(a) GrowableArray<uint>(a, INITIAL_EDGE_COUNT, 0, 0);
+  }
+  _edges->append_if_missing(v);
+}
+
+void PointsToNode::remove_edge(uint targIdx, PointsToNode::EdgeType et) {
+  uint v = (targIdx << EdgeShift) + ((uint) et);
+
+  _edges->remove(v);
+}
+
+#ifndef PRODUCT
+static char *node_type_names[] = {
+  "UnknownType",
+  "JavaObject",
+  "LocalVar",
+  "Field"
+};
+
+static char *esc_names[] = {
+  "UnknownEscape",
+  "NoEscape     ",
+  "ArgEscape    ",
+  "GlobalEscape "
+};
+
+static char *edge_type_suffix[] = {
+ "?", // UnknownEdge
+ "P", // PointsToEdge
+ "D", // DeferredEdge
+ "F"  // FieldEdge
+};
+
+void PointsToNode::dump() const {
+  NodeType nt = node_type();
+  EscapeState es = escape_state();
+  tty->print("%s  %s  [[", node_type_names[(int) nt], esc_names[(int) es]);
+  for (uint i = 0; i < edge_count(); i++) {
+    tty->print(" %d%s", edge_target(i), edge_type_suffix[(int) edge_type(i)]);
+  }
+  tty->print("]]  ");
+  if (_node == NULL)
+    tty->print_cr("<null>");
+  else
+    _node->dump();
+}
+#endif
+
+ConnectionGraph::ConnectionGraph(Compile * C) : _processed(C->comp_arena()), _node_map(C->comp_arena()) {
+  _collecting = true;
+  this->_compile = C;
+  const PointsToNode &dummy = PointsToNode();
+  _nodes = new(C->comp_arena()) GrowableArray<PointsToNode>(C->comp_arena(), (int) INITIAL_NODE_COUNT, 0, dummy);
+  _phantom_object = C->top()->_idx;
+  PointsToNode *phn = ptnode_adr(_phantom_object);
+  phn->set_node_type(PointsToNode::JavaObject);
+  phn->set_escape_state(PointsToNode::GlobalEscape);
+}
+
+void ConnectionGraph::add_pointsto_edge(uint from_i, uint to_i) {
+  PointsToNode *f = ptnode_adr(from_i);
+  PointsToNode *t = ptnode_adr(to_i);
+
+  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+  assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of PointsTo edge");
+  assert(t->node_type() == PointsToNode::JavaObject, "invalid destination of PointsTo edge");
+  f->add_edge(to_i, PointsToNode::PointsToEdge);
+}
+
+void ConnectionGraph::add_deferred_edge(uint from_i, uint to_i) {
+  PointsToNode *f = ptnode_adr(from_i);
+  PointsToNode *t = ptnode_adr(to_i);
+
+  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+  assert(f->node_type() == PointsToNode::LocalVar || f->node_type() == PointsToNode::Field, "invalid source of Deferred edge");
+  assert(t->node_type() == PointsToNode::LocalVar || t->node_type() == PointsToNode::Field, "invalid destination of Deferred edge");
+  // don't add a self-referential edge, this can occur during removal of
+  // deferred edges
+  if (from_i != to_i)
+    f->add_edge(to_i, PointsToNode::DeferredEdge);
+}
+
+int ConnectionGraph::type_to_offset(const Type *t) {
+  const TypePtr *t_ptr = t->isa_ptr();
+  assert(t_ptr != NULL, "must be a pointer type");
+  return t_ptr->offset();
+}
+
+void ConnectionGraph::add_field_edge(uint from_i, uint to_i, int offset) {
+  PointsToNode *f = ptnode_adr(from_i);
+  PointsToNode *t = ptnode_adr(to_i);
+
+  assert(f->node_type() != PointsToNode::UnknownType && t->node_type() != PointsToNode::UnknownType, "node types must be set");
+  assert(f->node_type() == PointsToNode::JavaObject, "invalid destination of Field edge");
+  assert(t->node_type() == PointsToNode::Field, "invalid destination of Field edge");
+  assert (t->offset() == -1 || t->offset() == offset, "conflicting field offsets");
+  t->set_offset(offset);
+
+  f->add_edge(to_i, PointsToNode::FieldEdge);
+}
+
+void ConnectionGraph::set_escape_state(uint ni, PointsToNode::EscapeState es) {
+  PointsToNode *npt = ptnode_adr(ni);
+  PointsToNode::EscapeState old_es = npt->escape_state();
+  if (es > old_es)
+    npt->set_escape_state(es);
+}
+
+PointsToNode::EscapeState ConnectionGraph::escape_state(Node *n, PhaseTransform *phase) {
+  uint idx = n->_idx;
+  PointsToNode::EscapeState es;
+
+  // If we are still collecting we don't know the answer yet
+  if (_collecting)
+    return PointsToNode::UnknownEscape;
+
+  // if the node was created after the escape computation, return
+  // UnknownEscape
+  if (idx >= (uint)_nodes->length())
+    return PointsToNode::UnknownEscape;
+
+  es = _nodes->at_grow(idx).escape_state();
+
+  // if we have already computed a value, return it
+  if (es != PointsToNode::UnknownEscape)
+    return es;
+
+  // compute max escape state of anything this node could point to
+  VectorSet ptset(Thread::current()->resource_area());
+  PointsTo(ptset, n, phase);
+  for( VectorSetI i(&ptset); i.test() && es != PointsToNode::GlobalEscape; ++i ) {
+    uint pt = i.elem;
+    PointsToNode::EscapeState pes = _nodes->at(pt).escape_state();
+    if (pes > es)
+      es = pes;
+  }
+  // cache the computed escape state
+  assert(es != PointsToNode::UnknownEscape, "should have computed an escape state");
+  _nodes->adr_at(idx)->set_escape_state(es);
+  return es;
+}
+
+void ConnectionGraph::PointsTo(VectorSet &ptset, Node * n, PhaseTransform *phase) {
+  VectorSet visited(Thread::current()->resource_area());
+  GrowableArray<uint>  worklist;
+
+  n = skip_casts(n);
+  PointsToNode  npt = _nodes->at_grow(n->_idx);
+
+  // If we have a JavaObject, return just that object
+  if (npt.node_type() == PointsToNode::JavaObject) {
+    ptset.set(n->_idx);
+    return;
+  }
+  // we may have a Phi which has not been processed
+  if (npt._node == NULL) {
+    assert(n->is_Phi(), "unprocessed node must be a Phi");
+    record_for_escape_analysis(n);
+    npt = _nodes->at(n->_idx);
+  }
+  worklist.push(n->_idx);
+  while(worklist.length() > 0) {
+    int ni = worklist.pop();
+    PointsToNode pn = _nodes->at_grow(ni);
+    if (!visited.test(ni)) {
+      visited.set(ni);
+
+      // ensure that all inputs of a Phi have been processed
+      if (_collecting && pn._node->is_Phi()) {
+        PhiNode *phi = pn._node->as_Phi();
+        process_phi_escape(phi, phase);
+      }
+
+      int edges_processed = 0;
+      for (uint e = 0; e < pn.edge_count(); e++) {
+        PointsToNode::EdgeType et = pn.edge_type(e);
+        if (et == PointsToNode::PointsToEdge) {
+          ptset.set(pn.edge_target(e));
+          edges_processed++;
+        } else if (et == PointsToNode::DeferredEdge) {
+          worklist.push(pn.edge_target(e));
+          edges_processed++;
+        }
+      }
+      if (edges_processed == 0) {
+        // no deferred or pointsto edges found.  Assume the value was set outside
+        // this method.  Add the phantom object to the pointsto set.
+        ptset.set(_phantom_object);
+      }
+    }
+  }
+}
+
+void ConnectionGraph::remove_deferred(uint ni) {
+  VectorSet visited(Thread::current()->resource_area());
+
+  uint i = 0;
+  PointsToNode *ptn = ptnode_adr(ni);
+
+  while(i < ptn->edge_count()) {
+    if (ptn->edge_type(i) != PointsToNode::DeferredEdge) {
+      i++;
+    } else {
+      uint t = ptn->edge_target(i);
+      PointsToNode *ptt = ptnode_adr(t);
+      ptn->remove_edge(t, PointsToNode::DeferredEdge);
+      if(!visited.test(t)) {
+        visited.set(t);
+        for (uint j = 0; j < ptt->edge_count(); j++) {
+          uint n1 = ptt->edge_target(j);
+          PointsToNode *pt1 = ptnode_adr(n1);
+          switch(ptt->edge_type(j)) {
+            case PointsToNode::PointsToEdge:
+               add_pointsto_edge(ni, n1);
+              break;
+            case PointsToNode::DeferredEdge:
+              add_deferred_edge(ni, n1);
+              break;
+            case PointsToNode::FieldEdge:
+              assert(false, "invalid connection graph");
+              break;
+          }
+        }
+      }
+    }
+  }
+}
+
+
+//  Add an edge to node given by "to_i" from any field of adr_i whose offset
+//  matches "offset"  A deferred edge is added if to_i is a LocalVar, and
+//  a pointsto edge is added if it is a JavaObject
+
+void ConnectionGraph::add_edge_from_fields(uint adr_i, uint to_i, int offs) {
+  PointsToNode an = _nodes->at_grow(adr_i);
+  PointsToNode to = _nodes->at_grow(to_i);
+  bool deferred = (to.node_type() == PointsToNode::LocalVar);
+
+  for (uint fe = 0; fe < an.edge_count(); fe++) {
+    assert(an.edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
+    int fi = an.edge_target(fe);
+    PointsToNode pf = _nodes->at_grow(fi);
+    int po = pf.offset();
+    if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
+      if (deferred)
+        add_deferred_edge(fi, to_i);
+      else
+        add_pointsto_edge(fi, to_i);
+    }
+  }
+}
+
+//  Add a deferred  edge from node given by "from_i" to any field of adr_i whose offset
+//  matches "offset"
+void ConnectionGraph::add_deferred_edge_to_fields(uint from_i, uint adr_i, int offs) {
+  PointsToNode an = _nodes->at_grow(adr_i);
+  for (uint fe = 0; fe < an.edge_count(); fe++) {
+    assert(an.edge_type(fe) == PointsToNode::FieldEdge, "expecting a field edge");
+    int fi = an.edge_target(fe);
+    PointsToNode pf = _nodes->at_grow(fi);
+    int po = pf.offset();
+    if (pf.edge_count() == 0) {
+      // we have not seen any stores to this field, assume it was set outside this method
+      add_pointsto_edge(fi, _phantom_object);
+    }
+    if (po == offs || po == Type::OffsetBot || offs == Type::OffsetBot) {
+      add_deferred_edge(from_i, fi);
+    }
+  }
+}
+
+//
+// Search memory chain of "mem" to find a MemNode whose address
+// is the specified alias index.  Returns the MemNode found or the
+// first non-MemNode encountered.
+//
+Node *ConnectionGraph::find_mem(Node *mem, int alias_idx, PhaseGVN  *igvn) {
+  if (mem == NULL)
+    return mem;
+  while (mem->is_Mem()) {
+    const Type *at = igvn->type(mem->in(MemNode::Address));
+    if (at != Type::TOP) {
+      assert (at->isa_ptr() != NULL, "pointer type required.");
+      int idx = _compile->get_alias_index(at->is_ptr());
+      if (idx == alias_idx)
+        break;
+    }
+    mem = mem->in(MemNode::Memory);
+  }
+  return mem;
+}
+
+//
+// Adjust the type and inputs of an AddP which computes the
+// address of a field of an instance
+//
+void ConnectionGraph::split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn) {
+  const TypeOopPtr *t = igvn->type(addp)->isa_oopptr();
+  const TypeOopPtr *base_t = igvn->type(base)->isa_oopptr();
+  assert(t != NULL,  "expecting oopptr");
+  assert(base_t != NULL && base_t->is_instance(), "expecting instance oopptr");
+  uint inst_id =  base_t->instance_id();
+  assert(!t->is_instance() || t->instance_id() == inst_id,
+                             "old type must be non-instance or match new type");
+  const TypeOopPtr *tinst = base_t->add_offset(t->offset())->is_oopptr();
+  // ensure an alias index is allocated for the instance type
+  int alias_idx = _compile->get_alias_index(tinst);
+  igvn->set_type(addp, tinst);
+  // record the allocation in the node map
+  set_map(addp->_idx, get_map(base->_idx));
+  // if the Address input is not the appropriate instance type (due to intervening
+  // casts,) insert a cast
+  Node *adr = addp->in(AddPNode::Address);
+  const TypeOopPtr  *atype = igvn->type(adr)->isa_oopptr();
+  if (atype->instance_id() != inst_id) {
+    assert(!atype->is_instance(), "no conflicting instances");
+    const TypeOopPtr *new_atype = base_t->add_offset(atype->offset())->isa_oopptr();
+    Node *acast = new (_compile, 2) CastPPNode(adr, new_atype);
+    acast->set_req(0, adr->in(0));
+    igvn->set_type(acast, new_atype);
+    record_for_optimizer(acast);
+    Node *bcast = acast;
+    Node *abase = addp->in(AddPNode::Base);
+    if (abase != adr) {
+      bcast = new (_compile, 2) CastPPNode(abase, base_t);
+      bcast->set_req(0, abase->in(0));
+      igvn->set_type(bcast, base_t);
+      record_for_optimizer(bcast);
+    }
+    igvn->hash_delete(addp);
+    addp->set_req(AddPNode::Base, bcast);
+    addp->set_req(AddPNode::Address, acast);
+    igvn->hash_insert(addp);
+    record_for_optimizer(addp);
+  }
+}
+
+//
+// Create a new version of orig_phi if necessary. Returns either the newly
+// created phi or an existing phi.  Sets create_new to indicate wheter  a new
+// phi was created.  Cache the last newly created phi in the node map.
+//
+PhiNode *ConnectionGraph::create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn, bool &new_created) {
+  Compile *C = _compile;
+  new_created = false;
+  int phi_alias_idx = C->get_alias_index(orig_phi->adr_type());
+  // nothing to do if orig_phi is bottom memory or matches alias_idx
+  if (phi_alias_idx == Compile::AliasIdxBot || phi_alias_idx == alias_idx) {
+    return orig_phi;
+  }
+  // have we already created a Phi for this alias index?
+  PhiNode *result = get_map_phi(orig_phi->_idx);
+  const TypePtr *atype = C->get_adr_type(alias_idx);
+  if (result != NULL && C->get_alias_index(result->adr_type()) == alias_idx) {
+    return result;
+  }
+
+  orig_phi_worklist.append_if_missing(orig_phi);
+  result = PhiNode::make(orig_phi->in(0), NULL, Type::MEMORY, atype);
+  set_map_phi(orig_phi->_idx, result);
+  igvn->set_type(result, result->bottom_type());
+  record_for_optimizer(result);
+  new_created = true;
+  return result;
+}
+
+//
+// Return a new version  of Memory Phi "orig_phi" with the inputs having the
+// specified alias index.
+//
+PhiNode *ConnectionGraph::split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn) {
+
+  assert(alias_idx != Compile::AliasIdxBot, "can't split out bottom memory");
+  Compile *C = _compile;
+  bool new_phi_created;
+  PhiNode *result =  create_split_phi(orig_phi, alias_idx, orig_phi_worklist, igvn, new_phi_created);
+  if (!new_phi_created) {
+    return result;
+  }
+
+  GrowableArray<PhiNode *>  phi_list;
+  GrowableArray<uint>  cur_input;
+
+  PhiNode *phi = orig_phi;
+  uint idx = 1;
+  bool finished = false;
+  while(!finished) {
+    while (idx < phi->req()) {
+      Node *mem = find_mem(phi->in(idx), alias_idx, igvn);
+      if (mem != NULL && mem->is_Phi()) {
+        PhiNode *nphi = create_split_phi(mem->as_Phi(), alias_idx, orig_phi_worklist, igvn, new_phi_created);
+        if (new_phi_created) {
+          // found an phi for which we created a new split, push current one on worklist and begin
+          // processing new one
+          phi_list.push(phi);
+          cur_input.push(idx);
+          phi = mem->as_Phi();
+          result = nphi;
+          idx = 1;
+          continue;
+        } else {
+          mem = nphi;
+        }
+      }
+      result->set_req(idx++, mem);
+    }
+#ifdef ASSERT
+    // verify that the new Phi has an input for each input of the original
+    assert( phi->req() == result->req(), "must have same number of inputs.");
+    assert( result->in(0) != NULL && result->in(0) == phi->in(0), "regions must match");
+    for (uint i = 1; i < phi->req(); i++) {
+      assert((phi->in(i) == NULL) == (result->in(i) == NULL), "inputs must correspond.");
+    }
+#endif
+    // we have finished processing a Phi, see if there are any more to do
+    finished = (phi_list.length() == 0 );
+    if (!finished) {
+      phi = phi_list.pop();
+      idx = cur_input.pop();
+      PhiNode *prev_phi = get_map_phi(phi->_idx);
+      prev_phi->set_req(idx++, result);
+      result = prev_phi;
+    }
+  }
+  return result;
+}
+
+//
+//  Convert the types of unescaped object to instance types where possible,
+//  propagate the new type information through the graph, and update memory
+//  edges and MergeMem inputs to reflect the new type.
+//
+//  We start with allocations (and calls which may be allocations)  on alloc_worklist.
+//  The processing is done in 4 phases:
+//
+//  Phase 1:  Process possible allocations from alloc_worklist.  Create instance
+//            types for the CheckCastPP for allocations where possible.
+//            Propagate the the new types through users as follows:
+//               casts and Phi:  push users on alloc_worklist
+//               AddP:  cast Base and Address inputs to the instance type
+//                      push any AddP users on alloc_worklist and push any memnode
+//                      users onto memnode_worklist.
+//  Phase 2:  Process MemNode's from memnode_worklist. compute new address type and
+//            search the Memory chain for a store with the appropriate type
+//            address type.  If a Phi is found, create a new version with
+//            the approriate memory slices from each of the Phi inputs.
+//            For stores, process the users as follows:
+//               MemNode:  push on memnode_worklist
+//               MergeMem: push on mergemem_worklist
+//  Phase 3:  Process MergeMem nodes from mergemem_worklist.  Walk each memory slice
+//            moving the first node encountered of each  instance type to the
+//            the input corresponding to its alias index.
+//            appropriate memory slice.
+//  Phase 4:  Update the inputs of non-instance memory Phis and the Memory input of memnodes.
+//
+// In the following example, the CheckCastPP nodes are the cast of allocation
+// results and the allocation of node 29 is unescaped and eligible to be an
+// instance type.
+//
+// We start with:
+//
+//     7 Parm #memory
+//    10  ConI  "12"
+//    19  CheckCastPP   "Foo"
+//    20  AddP  _ 19 19 10  Foo+12  alias_index=4
+//    29  CheckCastPP   "Foo"
+//    30  AddP  _ 29 29 10  Foo+12  alias_index=4
+//
+//    40  StoreP  25   7  20   ... alias_index=4
+//    50  StoreP  35  40  30   ... alias_index=4
+//    60  StoreP  45  50  20   ... alias_index=4
+//    70  LoadP    _  60  30   ... alias_index=4
+//    80  Phi     75  50  60   Memory alias_index=4
+//    90  LoadP    _  80  30   ... alias_index=4
+//   100  LoadP    _  80  20   ... alias_index=4
+//
+//
+// Phase 1 creates an instance type for node 29 assigning it an instance id of 24
+// and creating a new alias index for node 30.  This gives:
+//
+//     7 Parm #memory
+//    10  ConI  "12"
+//    19  CheckCastPP   "Foo"
+//    20  AddP  _ 19 19 10  Foo+12  alias_index=4
+//    29  CheckCastPP   "Foo"  iid=24
+//    30  AddP  _ 29 29 10  Foo+12  alias_index=6  iid=24
+//
+//    40  StoreP  25   7  20   ... alias_index=4
+//    50  StoreP  35  40  30   ... alias_index=6
+//    60  StoreP  45  50  20   ... alias_index=4
+//    70  LoadP    _  60  30   ... alias_index=6
+//    80  Phi     75  50  60   Memory alias_index=4
+//    90  LoadP    _  80  30   ... alias_index=6
+//   100  LoadP    _  80  20   ... alias_index=4
+//
+// In phase 2, new memory inputs are computed for the loads and stores,
+// And a new version of the phi is created.  In phase 4, the inputs to
+// node 80 are updated and then the memory nodes are updated with the
+// values computed in phase 2.  This results in:
+//
+//     7 Parm #memory
+//    10  ConI  "12"
+//    19  CheckCastPP   "Foo"
+//    20  AddP  _ 19 19 10  Foo+12  alias_index=4
+//    29  CheckCastPP   "Foo"  iid=24
+//    30  AddP  _ 29 29 10  Foo+12  alias_index=6  iid=24
+//
+//    40  StoreP  25  7   20   ... alias_index=4
+//    50  StoreP  35  7   30   ... alias_index=6
+//    60  StoreP  45  40  20   ... alias_index=4
+//    70  LoadP    _  50  30   ... alias_index=6
+//    80  Phi     75  40  60   Memory alias_index=4
+//   120  Phi     75  50  50   Memory alias_index=6
+//    90  LoadP    _ 120  30   ... alias_index=6
+//   100  LoadP    _  80  20   ... alias_index=4
+//
+void ConnectionGraph::split_unique_types(GrowableArray<Node *>  &alloc_worklist) {
+  GrowableArray<Node *>  memnode_worklist;
+  GrowableArray<Node *>  mergemem_worklist;
+  GrowableArray<PhiNode *>  orig_phis;
+  PhaseGVN  *igvn = _compile->initial_gvn();
+  uint new_index_start = (uint) _compile->num_alias_types();
+  VectorSet visited(Thread::current()->resource_area());
+  VectorSet ptset(Thread::current()->resource_area());
+
+  //  Phase 1:  Process possible allocations from alloc_worklist.  Create instance
+  //            types for the CheckCastPP for allocations where possible.
+  while (alloc_worklist.length() != 0) {
+    Node *n = alloc_worklist.pop();
+    uint ni = n->_idx;
+    if (n->is_Call()) {
+      CallNode *alloc = n->as_Call();
+      // copy escape information to call node
+      PointsToNode ptn = _nodes->at(alloc->_idx);
+      PointsToNode::EscapeState es = escape_state(alloc, igvn);
+      alloc->_escape_state = es;
+      // find CheckCastPP of call return value
+      n = alloc->proj_out(TypeFunc::Parms);
+      if (n != NULL && n->outcnt() == 1) {
+        n = n->unique_out();
+        if (n->Opcode() != Op_CheckCastPP) {
+          continue;
+        }
+      } else {
+        continue;
+      }
+      // we have an allocation or call which returns a Java object, see if it is unescaped
+      if (es != PointsToNode::NoEscape || !ptn._unique_type) {
+        continue; //  can't make a unique type
+      }
+      set_map(alloc->_idx, n);
+      set_map(n->_idx, alloc);
+      const TypeInstPtr *t = igvn->type(n)->isa_instptr();
+      // Unique types which are arrays are not currently supported.
+      // The check for AllocateArray is needed in case an array
+      // allocation is immediately cast to Object
+      if (t == NULL || alloc->is_AllocateArray())
+        continue;  // not a TypeInstPtr
+      const TypeOopPtr *tinst = t->cast_to_instance(ni);
+      igvn->hash_delete(n);
+      igvn->set_type(n,  tinst);
+      n->raise_bottom_type(tinst);
+      igvn->hash_insert(n);
+    } else if (n->is_AddP()) {
+      ptset.Clear();
+      PointsTo(ptset, n->in(AddPNode::Address), igvn);
+      assert(ptset.Size() == 1, "AddP address is unique");
+      Node *base = get_map(ptset.getelem());
+      split_AddP(n, base, igvn);
+    } else if (n->is_Phi() || n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) {
+      if (visited.test_set(n->_idx)) {
+        assert(n->is_Phi(), "loops only through Phi's");
+        continue;  // already processed
+      }
+      ptset.Clear();
+      PointsTo(ptset, n, igvn);
+      if (ptset.Size() == 1) {
+        TypeNode *tn = n->as_Type();
+        Node *val = get_map(ptset.getelem());
+        const TypeInstPtr *val_t = igvn->type(val)->isa_instptr();;
+        assert(val_t != NULL && val_t->is_instance(), "instance type expected.");
+        const TypeInstPtr *tn_t = igvn->type(tn)->isa_instptr();;
+
+        if (tn_t != NULL && val_t->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE)->higher_equal(tn_t)) {
+          igvn->hash_delete(tn);
+          igvn->set_type(tn, val_t);
+          tn->set_type(val_t);
+          igvn->hash_insert(tn);
+        }
+      }
+    } else {
+      continue;
+    }
+    // push users on appropriate worklist
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node *use = n->fast_out(i);
+      if(use->is_Mem() && use->in(MemNode::Address) == n) {
+        memnode_worklist.push(use);
+      } else if (use->is_AddP() || use->is_Phi() || use->Opcode() == Op_CastPP || use->Opcode() == Op_CheckCastPP) {
+        alloc_worklist.push(use);
+      }
+    }
+
+  }
+  uint new_index_end = (uint) _compile->num_alias_types();
+
+  //  Phase 2:  Process MemNode's from memnode_worklist. compute new address type and
+  //            compute new values for Memory inputs  (the Memory inputs are not
+  //            actually updated until phase 4.)
+  if (memnode_worklist.length() == 0)
+    return;  // nothing to do
+
+
+  while (memnode_worklist.length() != 0) {
+    Node *n = memnode_worklist.pop();
+    if (n->is_Phi()) {
+      assert(n->as_Phi()->adr_type() != TypePtr::BOTTOM, "narrow memory slice required");
+      // we don't need to do anything, but the users must be pushed if we haven't processed
+      // this Phi before
+      if (visited.test_set(n->_idx))
+        continue;
+    } else {
+      assert(n->is_Mem(), "memory node required.");
+      Node *addr = n->in(MemNode::Address);
+      const Type *addr_t = igvn->type(addr);
+      if (addr_t == Type::TOP)
+        continue;
+      assert (addr_t->isa_ptr() != NULL, "pointer type required.");
+      int alias_idx = _compile->get_alias_index(addr_t->is_ptr());
+      Node *mem = find_mem(n->in(MemNode::Memory), alias_idx, igvn);
+      if (mem->is_Phi()) {
+        mem = split_memory_phi(mem->as_Phi(), alias_idx, orig_phis, igvn);
+      }
+      if (mem != n->in(MemNode::Memory))
+        set_map(n->_idx, mem);
+      if (n->is_Load()) {
+        continue;  // don't push users
+      } else if (n->is_LoadStore()) {
+        // get the memory projection
+        for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+          Node *use = n->fast_out(i);
+          if (use->Opcode() == Op_SCMemProj) {
+            n = use;
+            break;
+          }
+        }
+        assert(n->Opcode() == Op_SCMemProj, "memory projection required");
+      }
+    }
+    // push user on appropriate worklist
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node *use = n->fast_out(i);
+      if (use->is_Phi()) {
+        memnode_worklist.push(use);
+      } else if(use->is_Mem() && use->in(MemNode::Memory) == n) {
+        memnode_worklist.push(use);
+      } else if (use->is_MergeMem()) {
+        mergemem_worklist.push(use);
+      }
+    }
+  }
+
+  //  Phase 3:  Process MergeMem nodes from mergemem_worklist.  Walk each memory slice
+  //            moving the first node encountered of each  instance type to the
+  //            the input corresponding to its alias index.
+  while (mergemem_worklist.length() != 0) {
+    Node *n = mergemem_worklist.pop();
+    assert(n->is_MergeMem(), "MergeMem node required.");
+    MergeMemNode *nmm = n->as_MergeMem();
+    // Note: we don't want to use MergeMemStream here because we only want to
+    //       scan inputs which exist at the start, not ones we add during processing
+    uint nslices = nmm->req();
+    igvn->hash_delete(nmm);
+    for (uint i = Compile::AliasIdxRaw+1; i < nslices; i++) {
+      Node * mem = nmm->in(i);
+      Node * cur = NULL;
+      if (mem == NULL || mem->is_top())
+        continue;
+      while (mem->is_Mem()) {
+        const Type *at = igvn->type(mem->in(MemNode::Address));
+        if (at != Type::TOP) {
+          assert (at->isa_ptr() != NULL, "pointer type required.");
+          uint idx = (uint)_compile->get_alias_index(at->is_ptr());
+          if (idx == i) {
+            if (cur == NULL)
+              cur = mem;
+          } else {
+            if (idx >= nmm->req() || nmm->is_empty_memory(nmm->in(idx))) {
+              nmm->set_memory_at(idx, mem);
+            }
+          }
+        }
+        mem = mem->in(MemNode::Memory);
+      }
+      nmm->set_memory_at(i, (cur != NULL) ? cur : mem);
+      if (mem->is_Phi()) {
+        // We have encountered a Phi, we need to split the Phi for
+        // any  instance of the current type if we haven't encountered
+        //  a value of the instance along the chain.
+        for (uint ni = new_index_start; ni < new_index_end; ni++) {
+          if((uint)_compile->get_general_index(ni) == i) {
+            Node *m = (ni >= nmm->req()) ? nmm->empty_memory() : nmm->in(ni);
+            if (nmm->is_empty_memory(m)) {
+              nmm->set_memory_at(ni, split_memory_phi(mem->as_Phi(), ni, orig_phis, igvn));
+            }
+          }
+        }
+      }
+    }
+    igvn->hash_insert(nmm);
+    record_for_optimizer(nmm);
+  }
+
+  //  Phase 4:  Update the inputs of non-instance memory Phis and the Memory input of memnodes
+  //
+  // First update the inputs of any non-instance Phi's from
+  // which we split out an instance Phi.  Note we don't have
+  // to recursively process Phi's encounted on the input memory
+  // chains as is done in split_memory_phi() since they  will
+  // also be processed here.
+  while (orig_phis.length() != 0) {
+    PhiNode *phi = orig_phis.pop();
+    int alias_idx = _compile->get_alias_index(phi->adr_type());
+    igvn->hash_delete(phi);
+    for (uint i = 1; i < phi->req(); i++) {
+      Node *mem = phi->in(i);
+      Node *new_mem = find_mem(mem, alias_idx, igvn);
+      if (mem != new_mem) {
+        phi->set_req(i, new_mem);
+      }
+    }
+    igvn->hash_insert(phi);
+    record_for_optimizer(phi);
+  }
+
+  // Update the memory inputs of MemNodes with the value we computed
+  // in Phase 2.
+  for (int i = 0; i < _nodes->length(); i++) {
+    Node *nmem = get_map(i);
+    if (nmem != NULL) {
+      Node *n = _nodes->at(i)._node;
+      if (n != NULL && n->is_Mem()) {
+        igvn->hash_delete(n);
+        n->set_req(MemNode::Memory, nmem);
+        igvn->hash_insert(n);
+        record_for_optimizer(n);
+      }
+    }
+  }
+}
+
+void ConnectionGraph::compute_escape() {
+  GrowableArray<int>  worklist;
+  GrowableArray<Node *>  alloc_worklist;
+  VectorSet visited(Thread::current()->resource_area());
+  PhaseGVN  *igvn = _compile->initial_gvn();
+
+  // process Phi nodes from the deferred list, they may not have
+  while(_deferred.size() > 0) {
+    Node * n = _deferred.pop();
+    PhiNode * phi = n->as_Phi();
+
+    process_phi_escape(phi, igvn);
+  }
+
+  VectorSet ptset(Thread::current()->resource_area());
+
+  // remove deferred edges from the graph and collect
+  // information we will need for type splitting
+  for (uint ni = 0; ni < (uint)_nodes->length(); ni++) {
+    PointsToNode * ptn = _nodes->adr_at(ni);
+    PointsToNode::NodeType nt = ptn->node_type();
+
+    if (nt == PointsToNode::UnknownType) {
+      continue;  // not a node we are interested in
+    }
+    Node *n = ptn->_node;
+    if (nt == PointsToNode::LocalVar || nt == PointsToNode::Field) {
+      remove_deferred(ni);
+      if (n->is_AddP()) {
+        // if this AddP computes an address which may point to more that one
+        // object, nothing the address points to can be a unique type.
+        Node *base = n->in(AddPNode::Base);
+        ptset.Clear();
+        PointsTo(ptset, base, igvn);
+        if (ptset.Size() > 1) {
+          for( VectorSetI j(&ptset); j.test(); ++j ) {
+            PointsToNode *ptaddr = _nodes->adr_at(j.elem);
+            ptaddr->_unique_type = false;
+          }
+        }
+      }
+    } else if (n->is_Call()) {
+        // initialize _escape_state of calls to GlobalEscape
+        n->as_Call()->_escape_state = PointsToNode::GlobalEscape;
+        // push call on alloc_worlist (alocations are calls)
+        // for processing by split_unique_types()
+        alloc_worklist.push(n);
+    }
+  }
+  // push all GlobalEscape nodes on the worklist
+  for (uint nj = 0; nj < (uint)_nodes->length(); nj++) {
+    if (_nodes->at(nj).escape_state() == PointsToNode::GlobalEscape) {
+      worklist.append(nj);
+    }
+  }
+  // mark all node reachable from GlobalEscape nodes
+  while(worklist.length() > 0) {
+    PointsToNode n = _nodes->at(worklist.pop());
+    for (uint ei = 0; ei < n.edge_count(); ei++) {
+      uint npi = n.edge_target(ei);
+      PointsToNode *np = ptnode_adr(npi);
+      if (np->escape_state() != PointsToNode::GlobalEscape) {
+        np->set_escape_state(PointsToNode::GlobalEscape);
+        worklist.append_if_missing(npi);
+      }
+    }
+  }
+
+  // push all ArgEscape nodes on the worklist
+  for (uint nk = 0; nk < (uint)_nodes->length(); nk++) {
+    if (_nodes->at(nk).escape_state() == PointsToNode::ArgEscape)
+      worklist.push(nk);
+  }
+  // mark all node reachable from ArgEscape nodes
+  while(worklist.length() > 0) {
+    PointsToNode n = _nodes->at(worklist.pop());
+
+    for (uint ei = 0; ei < n.edge_count(); ei++) {
+      uint npi = n.edge_target(ei);
+      PointsToNode *np = ptnode_adr(npi);
+      if (np->escape_state() != PointsToNode::ArgEscape) {
+        np->set_escape_state(PointsToNode::ArgEscape);
+        worklist.append_if_missing(npi);
+      }
+    }
+  }
+  _collecting = false;
+
+  // Now use the escape information to create unique types for
+  // unescaped objects
+  split_unique_types(alloc_worklist);
+}
+
+Node * ConnectionGraph::skip_casts(Node *n) {
+  while(n->Opcode() == Op_CastPP || n->Opcode() == Op_CheckCastPP) {
+    n = n->in(1);
+  }
+  return n;
+}
+
+void ConnectionGraph::process_phi_escape(PhiNode *phi, PhaseTransform *phase) {
+
+  if (phi->type()->isa_oopptr() == NULL)
+    return;  // nothing to do if not an oop
+
+  PointsToNode *ptadr = ptnode_adr(phi->_idx);
+  int incount = phi->req();
+  int non_null_inputs = 0;
+
+  for (int i = 1; i < incount ; i++) {
+    if (phi->in(i) != NULL)
+      non_null_inputs++;
+  }
+  if (non_null_inputs == ptadr->_inputs_processed)
+    return;  // no new inputs since the last time this node was processed,
+             // the current information is valid
+
+  ptadr->_inputs_processed = non_null_inputs;  // prevent recursive processing of this node
+  for (int j = 1; j < incount ; j++) {
+    Node * n = phi->in(j);
+    if (n == NULL)
+      continue;  // ignore NULL
+    n =  skip_casts(n);
+    if (n->is_top() || n == phi)
+      continue;  // ignore top or inputs which go back this node
+    int nopc = n->Opcode();
+    PointsToNode  npt = _nodes->at(n->_idx);
+    if (_nodes->at(n->_idx).node_type() == PointsToNode::JavaObject) {
+      add_pointsto_edge(phi->_idx, n->_idx);
+    } else {
+      add_deferred_edge(phi->_idx, n->_idx);
+    }
+  }
+}
+
+void ConnectionGraph::process_call_arguments(CallNode *call, PhaseTransform *phase) {
+
+    _processed.set(call->_idx);
+    switch (call->Opcode()) {
+
+    // arguments to allocation and locking don't escape
+    case Op_Allocate:
+    case Op_AllocateArray:
+    case Op_Lock:
+    case Op_Unlock:
+      break;
+
+    case Op_CallStaticJava:
+    // For a static call, we know exactly what method is being called.
+    // Use bytecode estimator to record the call's escape affects
+    {
+      ciMethod *meth = call->as_CallJava()->method();
+      if (meth != NULL) {
+        const TypeTuple * d = call->tf()->domain();
+        BCEscapeAnalyzer call_analyzer(meth);
+        VectorSet ptset(Thread::current()->resource_area());
+        for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+          const Type* at = d->field_at(i);
+          int k = i - TypeFunc::Parms;
+
+          if (at->isa_oopptr() != NULL) {
+            Node *arg = skip_casts(call->in(i));
+
+            if (!call_analyzer.is_arg_stack(k)) {
+              // The argument global escapes, mark everything it could point to
+              ptset.Clear();
+              PointsTo(ptset, arg, phase);
+              for( VectorSetI j(&ptset); j.test(); ++j ) {
+                uint pt = j.elem;
+
+                set_escape_state(pt, PointsToNode::GlobalEscape);
+              }
+            } else if (!call_analyzer.is_arg_local(k)) {
+              // The argument itself doesn't escape, but any fields might
+              ptset.Clear();
+              PointsTo(ptset, arg, phase);
+              for( VectorSetI j(&ptset); j.test(); ++j ) {
+                uint pt = j.elem;
+                add_edge_from_fields(pt, _phantom_object, Type::OffsetBot);
+              }
+            }
+          }
+        }
+        call_analyzer.copy_dependencies(C()->dependencies());
+        break;
+      }
+      // fall-through if not a Java method
+    }
+
+    default:
+    // Some other type of call, assume the worst case: all arguments
+    // globally escape.
+    {
+      // adjust escape state for  outgoing arguments
+      const TypeTuple * d = call->tf()->domain();
+      VectorSet ptset(Thread::current()->resource_area());
+      for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+        const Type* at = d->field_at(i);
+
+        if (at->isa_oopptr() != NULL) {
+          Node *arg = skip_casts(call->in(i));
+          ptset.Clear();
+          PointsTo(ptset, arg, phase);
+          for( VectorSetI j(&ptset); j.test(); ++j ) {
+            uint pt = j.elem;
+
+            set_escape_state(pt, PointsToNode::GlobalEscape);
+          }
+        }
+      }
+    }
+  }
+}
+void ConnectionGraph::process_call_result(ProjNode *resproj, PhaseTransform *phase) {
+  CallNode *call = resproj->in(0)->as_Call();
+
+  PointsToNode *ptadr = ptnode_adr(resproj->_idx);
+
+  ptadr->_node = resproj;
+  ptadr->set_node_type(PointsToNode::LocalVar);
+  set_escape_state(resproj->_idx, PointsToNode::UnknownEscape);
+  _processed.set(resproj->_idx);
+
+  switch (call->Opcode()) {
+    case Op_Allocate:
+    {
+      Node *k = call->in(AllocateNode::KlassNode);
+      const TypeKlassPtr *kt;
+      if (k->Opcode() == Op_LoadKlass) {
+        kt = k->as_Load()->type()->isa_klassptr();
+      } else {
+        kt = k->as_Type()->type()->isa_klassptr();
+      }
+      assert(kt != NULL, "TypeKlassPtr  required.");
+      ciKlass* cik = kt->klass();
+      ciInstanceKlass* ciik = cik->as_instance_klass();
+
+      PointsToNode *ptadr = ptnode_adr(call->_idx);
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      if (cik->is_subclass_of(_compile->env()->Thread_klass()) || ciik->has_finalizer()) {
+        set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+        add_pointsto_edge(resproj->_idx, _phantom_object);
+      } else {
+        set_escape_state(call->_idx, PointsToNode::NoEscape);
+        add_pointsto_edge(resproj->_idx, call->_idx);
+      }
+      _processed.set(call->_idx);
+      break;
+    }
+
+    case Op_AllocateArray:
+    {
+      PointsToNode *ptadr = ptnode_adr(call->_idx);
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      set_escape_state(call->_idx, PointsToNode::NoEscape);
+      _processed.set(call->_idx);
+      add_pointsto_edge(resproj->_idx, call->_idx);
+      break;
+    }
+
+    case Op_Lock:
+    case Op_Unlock:
+      break;
+
+    case Op_CallStaticJava:
+    // For a static call, we know exactly what method is being called.
+    // Use bytecode estimator to record whether the call's return value escapes
+    {
+      const TypeTuple *r = call->tf()->range();
+      const Type* ret_type = NULL;
+
+      if (r->cnt() > TypeFunc::Parms)
+        ret_type = r->field_at(TypeFunc::Parms);
+
+      // Note:  we use isa_ptr() instead of isa_oopptr()  here because the
+      //        _multianewarray functions return a TypeRawPtr.
+      if (ret_type == NULL || ret_type->isa_ptr() == NULL)
+        break;  // doesn't return a pointer type
+
+      ciMethod *meth = call->as_CallJava()->method();
+      if (meth == NULL) {
+        // not a Java method, assume global escape
+        set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+        if (resproj != NULL)
+          add_pointsto_edge(resproj->_idx, _phantom_object);
+      } else {
+        BCEscapeAnalyzer call_analyzer(meth);
+        VectorSet ptset(Thread::current()->resource_area());
+
+        if (call_analyzer.is_return_local() && resproj != NULL) {
+          // determine whether any arguments are returned
+          const TypeTuple * d = call->tf()->domain();
+          set_escape_state(call->_idx, PointsToNode::NoEscape);
+          for (uint i = TypeFunc::Parms; i < d->cnt(); i++) {
+            const Type* at = d->field_at(i);
+
+            if (at->isa_oopptr() != NULL) {
+              Node *arg = skip_casts(call->in(i));
+
+              if (call_analyzer.is_arg_returned(i - TypeFunc::Parms)) {
+                PointsToNode *arg_esp = _nodes->adr_at(arg->_idx);
+                if (arg_esp->node_type() == PointsToNode::JavaObject)
+                  add_pointsto_edge(resproj->_idx, arg->_idx);
+                else
+                  add_deferred_edge(resproj->_idx, arg->_idx);
+                arg_esp->_hidden_alias = true;
+              }
+            }
+          }
+        } else {
+          set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+          if (resproj != NULL)
+            add_pointsto_edge(resproj->_idx, _phantom_object);
+        }
+        call_analyzer.copy_dependencies(C()->dependencies());
+      }
+      break;
+    }
+
+    default:
+    // Some other type of call, assume the worst case that the
+    // returned value, if any, globally escapes.
+    {
+      const TypeTuple *r = call->tf()->range();
+
+      if (r->cnt() > TypeFunc::Parms) {
+        const Type* ret_type = r->field_at(TypeFunc::Parms);
+
+        // Note:  we use isa_ptr() instead of isa_oopptr()  here because the
+        //        _multianewarray functions return a TypeRawPtr.
+        if (ret_type->isa_ptr() != NULL) {
+          PointsToNode *ptadr = ptnode_adr(call->_idx);
+          ptadr->set_node_type(PointsToNode::JavaObject);
+          set_escape_state(call->_idx, PointsToNode::GlobalEscape);
+          if (resproj != NULL)
+            add_pointsto_edge(resproj->_idx, _phantom_object);
+        }
+      }
+    }
+  }
+}
+
+void ConnectionGraph::record_for_escape_analysis(Node *n) {
+  if (_collecting) {
+    if (n->is_Phi()) {
+      PhiNode *phi = n->as_Phi();
+      const Type *pt = phi->type();
+      if ((pt->isa_oopptr() != NULL) || pt == TypePtr::NULL_PTR) {
+        PointsToNode *ptn = ptnode_adr(phi->_idx);
+        ptn->set_node_type(PointsToNode::LocalVar);
+        ptn->_node = n;
+        _deferred.push(n);
+      }
+    }
+  }
+}
+
+void ConnectionGraph::record_escape_work(Node *n, PhaseTransform *phase) {
+
+  int opc = n->Opcode();
+  PointsToNode *ptadr = ptnode_adr(n->_idx);
+
+  if (_processed.test(n->_idx))
+    return;
+
+  ptadr->_node = n;
+  if (n->is_Call()) {
+    CallNode *call = n->as_Call();
+    process_call_arguments(call, phase);
+    return;
+  }
+
+  switch (opc) {
+    case Op_AddP:
+    {
+      Node *base = skip_casts(n->in(AddPNode::Base));
+      ptadr->set_node_type(PointsToNode::Field);
+
+      // create a field edge to this node from everything adr could point to
+      VectorSet ptset(Thread::current()->resource_area());
+      PointsTo(ptset, base, phase);
+      for( VectorSetI i(&ptset); i.test(); ++i ) {
+        uint pt = i.elem;
+        add_field_edge(pt, n->_idx, type_to_offset(phase->type(n)));
+      }
+      break;
+    }
+    case Op_Parm:
+    {
+      ProjNode *nproj = n->as_Proj();
+      uint con = nproj->_con;
+      if (con < TypeFunc::Parms)
+        return;
+      const Type *t = nproj->in(0)->as_Start()->_domain->field_at(con);
+      if (t->isa_ptr() == NULL)
+        return;
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      if (t->isa_oopptr() != NULL) {
+        set_escape_state(n->_idx, PointsToNode::ArgEscape);
+      } else {
+        // this must be the incoming state of an OSR compile, we have to assume anything
+        // passed in globally escapes
+        assert(_compile->is_osr_compilation(), "bad argument type for non-osr compilation");
+        set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+      }
+      _processed.set(n->_idx);
+      break;
+    }
+    case Op_Phi:
+    {
+      PhiNode *phi = n->as_Phi();
+      if (phi->type()->isa_oopptr() == NULL)
+        return;  // nothing to do if not an oop
+      ptadr->set_node_type(PointsToNode::LocalVar);
+      process_phi_escape(phi, phase);
+      break;
+    }
+    case Op_CreateEx:
+    {
+      // assume that all exception objects globally escape
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+      _processed.set(n->_idx);
+      break;
+    }
+    case Op_ConP:
+    {
+      const Type *t = phase->type(n);
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      // assume all pointer constants globally escape except for null
+      if (t == TypePtr::NULL_PTR)
+        set_escape_state(n->_idx, PointsToNode::NoEscape);
+      else
+        set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+      _processed.set(n->_idx);
+      break;
+    }
+    case Op_LoadKlass:
+    {
+      ptadr->set_node_type(PointsToNode::JavaObject);
+      set_escape_state(n->_idx, PointsToNode::GlobalEscape);
+      _processed.set(n->_idx);
+      break;
+    }
+    case Op_LoadP:
+    {
+      const Type *t = phase->type(n);
+      if (!t->isa_oopptr())
+        return;
+      ptadr->set_node_type(PointsToNode::LocalVar);
+      set_escape_state(n->_idx, PointsToNode::UnknownEscape);
+
+      Node *adr = skip_casts(n->in(MemNode::Address));
+      const Type *adr_type = phase->type(adr);
+      Node *adr_base = skip_casts((adr->Opcode() == Op_AddP) ? adr->in(AddPNode::Base) : adr);
+
+      // For everything "adr" could point to, create a deferred edge from
+      // this node to each field with the same offset as "adr_type"
+      VectorSet ptset(Thread::current()->resource_area());
+      PointsTo(ptset, adr_base, phase);
+      // If ptset is empty, then this value must have been set outside
+      // this method, so we add the phantom node
+      if (ptset.Size() == 0)
+        ptset.set(_phantom_object);
+      for( VectorSetI i(&ptset); i.test(); ++i ) {
+        uint pt = i.elem;
+        add_deferred_edge_to_fields(n->_idx, pt, type_to_offset(adr_type));
+      }
+      break;
+    }
+    case Op_StoreP:
+    case Op_StorePConditional:
+    case Op_CompareAndSwapP:
+    {
+      Node *adr = n->in(MemNode::Address);
+      Node *val = skip_casts(n->in(MemNode::ValueIn));
+      const Type *adr_type = phase->type(adr);
+      if (!adr_type->isa_oopptr())
+        return;
+
+      assert(adr->Opcode() == Op_AddP, "expecting an AddP");
+      Node *adr_base = adr->in(AddPNode::Base);
+
+      // For everything "adr_base" could point to, create a deferred edge to "val" from each field
+      // with the same offset as "adr_type"
+      VectorSet ptset(Thread::current()->resource_area());
+      PointsTo(ptset, adr_base, phase);
+      for( VectorSetI i(&ptset); i.test(); ++i ) {
+        uint pt = i.elem;
+        add_edge_from_fields(pt, val->_idx, type_to_offset(adr_type));
+      }
+      break;
+    }
+    case Op_Proj:
+    {
+      ProjNode *nproj = n->as_Proj();
+      Node *n0 = nproj->in(0);
+      // we are only interested in the result projection from a call
+      if (nproj->_con == TypeFunc::Parms && n0->is_Call() ) {
+        process_call_result(nproj, phase);
+      }
+
+      break;
+    }
+    case Op_CastPP:
+    case Op_CheckCastPP:
+    {
+      ptadr->set_node_type(PointsToNode::LocalVar);
+      int ti = n->in(1)->_idx;
+      if (_nodes->at(ti).node_type() == PointsToNode::JavaObject) {
+        add_pointsto_edge(n->_idx, ti);
+      } else {
+        add_deferred_edge(n->_idx, ti);
+      }
+      break;
+    }
+    default:
+      ;
+      // nothing to do
+  }
+}
+
+void ConnectionGraph::record_escape(Node *n, PhaseTransform *phase) {
+  if (_collecting)
+    record_escape_work(n, phase);
+}
+
+#ifndef PRODUCT
+void ConnectionGraph::dump() {
+  PhaseGVN  *igvn = _compile->initial_gvn();
+  bool first = true;
+
+  for (uint ni = 0; ni < (uint)_nodes->length(); ni++) {
+    PointsToNode *esp = _nodes->adr_at(ni);
+    if (esp->node_type() == PointsToNode::UnknownType || esp->_node == NULL)
+      continue;
+    PointsToNode::EscapeState es = escape_state(esp->_node, igvn);
+    if (es == PointsToNode::NoEscape || (Verbose &&
+            (es != PointsToNode::UnknownEscape || esp->edge_count() != 0))) {
+      // don't print null pointer node which almost every method has
+      if (esp->_node->Opcode() != Op_ConP || igvn->type(esp->_node) != TypePtr::NULL_PTR) {
+        if (first) {
+          tty->print("======== Connection graph for ");
+          C()->method()->print_short_name();
+          tty->cr();
+          first = false;
+        }
+        tty->print("%4d  ", ni);
+        esp->dump();
+      }
+    }
+  }
+}
+#endif
diff --git a/src/share/vm/opto/escape.hpp b/src/share/vm/opto/escape.hpp
new file mode 100644
index 000000000..3cd879257
--- /dev/null
+++ b/src/share/vm/opto/escape.hpp
@@ -0,0 +1,319 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//
+// Adaptation for C2 of the escape analysis algorithm described in:
+//
+//     [Choi99] Jong-Deok Shoi, Manish Gupta, Mauricio Seffano, Vugranam C. Sreedhar,
+//              Sam Midkiff,  "Escape Analysis for Java", Procedings of ACM SIGPLAN
+//              OOPSLA  Conference, November 1, 1999
+//
+// The flow-insensitive analysis described in the paper has been implemented.
+//
+// The analysis requires construction of a "connection graph" (CG) for the method being
+// analyzed.  The nodes of the connection graph are:
+//
+//     -  Java objects (JO)
+//     -  Local variables (LV)
+//     -  Fields of an object (OF),  these also include array elements
+//
+// The CG contains 3 types of edges:
+//
+//   -  PointsTo  (-P>)     {LV,OF}  to JO
+//   -  Deferred  (-D>)    from {LV, OF}  to {LV, OF}
+//   -  Field     (-F>)    from JO to OF
+//
+// The following  utility functions is used by the algorithm:
+//
+//   PointsTo(n)      - n is any CG node,  it returns the set of JO that n could
+//                      point to.
+//
+// The algorithm describes how to construct the connection graph in the following 4 cases:
+//
+//          Case                  Edges Created
+//
+// (1)   p   = new T()              LV  -P> JO
+// (2)   p   = q                    LV  -D> LV
+// (3)   p.f = q                    JO  -F> OF,  OF -D> LV
+// (4)   p   = q.f                  JO  -F> OF,  LV -D> OF
+//
+// In all these cases, p and q are local variables.  For static field references, we can
+// construct a local variable containing a reference to the static memory.
+//
+// C2 does not have local variables.  However for the purposes of constructing
+// the connection graph, the following IR nodes are treated as local variables:
+//     Phi    (pointer values)
+//     LoadP
+//     Proj  (value returned from callnodes including allocations)
+//     CheckCastPP
+//
+// The LoadP, Proj and CheckCastPP behave like variables assigned to only once.  Only
+// a Phi can have multiple assignments.  Each input to a Phi is treated
+// as an assignment to it.
+//
+// The following note types are JavaObject:
+//
+//     top()
+//     Allocate
+//     AllocateArray
+//     Parm  (for incoming arguments)
+//     CreateEx
+//     ConP
+//     LoadKlass
+//
+// AddP nodes are fields.
+//
+// After building the graph, a pass is made over the nodes, deleting deferred
+// nodes and copying the edges from the target of the deferred edge to the
+// source.  This results in a graph with no deferred edges, only:
+//
+//    LV -P> JO
+//    OF -P> JO
+//    JO -F> OF
+//
+// Then, for each node which is GlobalEscape, anything it could point to
+// is marked GlobalEscape.  Finally, for any node marked ArgEscape, anything
+// it could point to is marked ArgEscape.
+//
+
+class  Compile;
+class  Node;
+class  CallNode;
+class  PhiNode;
+class  PhaseTransform;
+class  Type;
+class  TypePtr;
+class  VectorSet;
+
+class PointsToNode {
+friend class ConnectionGraph;
+public:
+  typedef enum {
+    UnknownType    = 0,
+    JavaObject = 1,
+    LocalVar   = 2,
+    Field      = 3
+  } NodeType;
+
+  typedef enum {
+    UnknownEscape = 0,
+    NoEscape      = 1,
+    ArgEscape     = 2,
+    GlobalEscape  = 3
+  } EscapeState;
+
+  typedef enum {
+    UnknownEdge   = 0,
+    PointsToEdge  = 1,
+    DeferredEdge  = 2,
+    FieldEdge     = 3
+  } EdgeType;
+
+private:
+  enum {
+    EdgeMask = 3,
+    EdgeShift = 2,
+
+    INITIAL_EDGE_COUNT = 4
+  };
+
+  NodeType             _type;
+  EscapeState          _escape;
+  GrowableArray<uint>* _edges;  // outgoing edges
+  int                  _offset; // for fields
+
+  bool       _unique_type;       // For allocated objects, this node may be a unique type
+public:
+  Node*      _node;              // Ideal node corresponding to this PointsTo node
+  int        _inputs_processed;  // the number of Phi inputs that have been processed so far
+  bool       _hidden_alias;      // this node is an argument to a function which may return it
+                                 // creating a hidden alias
+
+
+  PointsToNode(): _offset(-1), _type(UnknownType), _escape(UnknownEscape), _edges(NULL), _node(NULL), _inputs_processed(0), _hidden_alias(false), _unique_type(true) {}
+
+  EscapeState escape_state() const { return _escape; }
+  NodeType node_type() const { return _type;}
+  int offset() { return _offset;}
+
+  void set_offset(int offs) { _offset = offs;}
+  void set_escape_state(EscapeState state) { _escape = state; }
+  void set_node_type(NodeType ntype) {
+    assert(_type == UnknownType || _type == ntype, "Can't change node type");
+    _type = ntype;
+  }
+
+  // count of outgoing edges
+  uint edge_count() const { return (_edges == NULL) ? 0 : _edges->length(); }
+  // node index of target of outgoing edge "e"
+  uint edge_target(uint e)  const;
+  // type of outgoing edge "e"
+  EdgeType edge_type(uint e)  const;
+  // add a edge of the specified type pointing to the specified target
+  void add_edge(uint targIdx, EdgeType et);
+  // remove an edge of the specified type pointing to the specified target
+  void remove_edge(uint targIdx, EdgeType et);
+#ifndef PRODUCT
+  void dump() const;
+#endif
+
+};
+
+class ConnectionGraph: public ResourceObj {
+private:
+  enum {
+    INITIAL_NODE_COUNT = 100                    // initial size of _nodes array
+  };
+
+
+  GrowableArray<PointsToNode>* _nodes;          // connection graph nodes  Indexed by ideal
+                                                // node index
+  Unique_Node_List             _deferred;       // Phi's to be processed after parsing
+  VectorSet                    _processed;      // records which nodes have been processed
+  bool                         _collecting;     // indicates whether escape information is
+                                                // still being collected.  If false, no new
+                                                // nodes will be processed
+  uint                         _phantom_object; // index of globally escaping object that
+                                                // pointer values loaded from a field which
+                                                // has not been set are assumed to point to
+  Compile *                    _compile;        // Compile object for current compilation
+
+  // address of an element in _nodes.  Used when the element is to be modified
+  PointsToNode *ptnode_adr(uint idx) {
+    if ((uint)_nodes->length() <= idx) {
+      // expand _nodes array
+      PointsToNode dummy = _nodes->at_grow(idx);
+    }
+    return _nodes->adr_at(idx);
+  }
+
+  // offset of a field reference
+  int type_to_offset(const Type *t);
+
+  // compute the escape state for arguments to a call
+  void process_call_arguments(CallNode *call, PhaseTransform *phase);
+
+  // compute the escape state for the return value of a call
+  void process_call_result(ProjNode *resproj, PhaseTransform *phase);
+
+  // compute the escape state of a Phi.  This may be called multiple
+  // times as new inputs are added to the Phi.
+  void process_phi_escape(PhiNode *phi, PhaseTransform *phase);
+
+  // compute the escape state of an ideal node.
+  void record_escape_work(Node *n, PhaseTransform *phase);
+
+  // walk the connection graph starting at the node corresponding to "n" and
+  // add the index of everything it could point to, to "ptset".  This may cause
+  // Phi's encountered to get (re)processed  (which requires "phase".)
+  void PointsTo(VectorSet &ptset, Node * n, PhaseTransform *phase);
+
+  //  Edge manipulation.  The "from_i" and "to_i" arguments are the
+  //  node indices of the source and destination of the edge
+  void add_pointsto_edge(uint from_i, uint to_i);
+  void add_deferred_edge(uint from_i, uint to_i);
+  void add_field_edge(uint from_i, uint to_i, int offs);
+
+
+  // Add an edge to node given by "to_i" from any field of adr_i whose offset
+  // matches "offset"  A deferred edge is added if to_i is a LocalVar, and
+  // a pointsto edge is added if it is a JavaObject
+  void add_edge_from_fields(uint adr, uint to_i, int offs);
+
+  // Add a deferred  edge from node given by "from_i" to any field of adr_i whose offset
+  // matches "offset"
+  void add_deferred_edge_to_fields(uint from_i, uint adr, int offs);
+
+
+  // Remove outgoing deferred edges from the node referenced by "ni".
+  // Any outgoing edges from the target of the deferred edge are copied
+  // to "ni".
+  void remove_deferred(uint ni);
+
+  Node_Array _node_map; // used for bookeeping during type splitting
+                        // Used for the following purposes:
+                        // Memory Phi    - most recent unique Phi split out
+                        //                 from this Phi
+                        // MemNode       - new memory input for this node
+                        // ChecCastPP    - allocation that this is a cast of
+                        // allocation    - CheckCastPP of the allocation
+  void split_AddP(Node *addp, Node *base,  PhaseGVN  *igvn);
+  PhiNode *create_split_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn, bool &new_created);
+  PhiNode *split_memory_phi(PhiNode *orig_phi, int alias_idx, GrowableArray<PhiNode *>  &orig_phi_worklist, PhaseGVN  *igvn);
+  Node *find_mem(Node *mem, int alias_idx, PhaseGVN  *igvn);
+  // Propagate unique types created for unescaped allocated objects
+  // through the graph
+  void split_unique_types(GrowableArray<Node *>  &alloc_worklist);
+
+  // manage entries in _node_map
+  void  set_map(int idx, Node *n)        { _node_map.map(idx, n); }
+  void  set_map_phi(int idx, PhiNode *p) { _node_map.map(idx, (Node *) p); }
+  Node *get_map(int idx)                 { return _node_map[idx]; }
+  PhiNode *get_map_phi(int idx) {
+    Node *phi = _node_map[idx];
+    return (phi == NULL) ? NULL : phi->as_Phi();
+  }
+
+  // Notify optimizer that a node has been modified
+  // Node:  This assumes that escape analysis is run before
+  //        PhaseIterGVN creation
+  void record_for_optimizer(Node *n) {
+    _compile->record_for_igvn(n);
+  }
+
+  // Set the escape state of a node
+  void set_escape_state(uint ni, PointsToNode::EscapeState es);
+
+  // bypass any casts and return the node they refer to
+  Node * skip_casts(Node *n);
+
+  // Get Compile object for current compilation.
+  Compile *C() const        { return _compile; }
+
+public:
+  ConnectionGraph(Compile *C);
+
+  // record a Phi for later processing.
+  void record_for_escape_analysis(Node *n);
+
+  // process a node and  fill in its connection graph node
+  void record_escape(Node *n, PhaseTransform *phase);
+
+  // All nodes have been recorded, compute the escape information
+  void compute_escape();
+
+  // escape state of a node
+  PointsToNode::EscapeState escape_state(Node *n, PhaseTransform *phase);
+
+  bool hidden_alias(Node *n) {
+    if (_collecting)
+      return true;
+    PointsToNode  ptn = _nodes->at_grow(n->_idx);
+    return (ptn.escape_state() != PointsToNode::NoEscape) || ptn._hidden_alias;
+  }
+
+#ifndef PRODUCT
+  void dump();
+#endif
+};
diff --git a/src/share/vm/opto/gcm.cpp b/src/share/vm/opto/gcm.cpp
new file mode 100644
index 000000000..88af191a1
--- /dev/null
+++ b/src/share/vm/opto/gcm.cpp
@@ -0,0 +1,1767 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_gcm.cpp.incl"
+
+//----------------------------schedule_node_into_block-------------------------
+// Insert node n into block b. Look for projections of n and make sure they
+// are in b also.
+void PhaseCFG::schedule_node_into_block( Node *n, Block *b ) {
+  // Set basic block of n, Add n to b,
+  _bbs.map(n->_idx, b);
+  b->add_inst(n);
+
+  // After Matching, nearly any old Node may have projections trailing it.
+  // These are usually machine-dependent flags.  In any case, they might
+  // float to another block below this one.  Move them up.
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node*  use  = n->fast_out(i);
+    if (use->is_Proj()) {
+      Block* buse = _bbs[use->_idx];
+      if (buse != b) {              // In wrong block?
+        if (buse != NULL)
+          buse->find_remove(use);   // Remove from wrong block
+        _bbs.map(use->_idx, b);     // Re-insert in this block
+        b->add_inst(use);
+      }
+    }
+  }
+}
+
+
+//------------------------------schedule_pinned_nodes--------------------------
+// Set the basic block for Nodes pinned into blocks
+void PhaseCFG::schedule_pinned_nodes( VectorSet &visited ) {
+  // Allocate node stack of size C->unique()+8 to avoid frequent realloc
+  GrowableArray <Node *> spstack(C->unique()+8);
+  spstack.push(_root);
+  while ( spstack.is_nonempty() ) {
+    Node *n = spstack.pop();
+    if( !visited.test_set(n->_idx) ) { // Test node and flag it as visited
+      if( n->pinned() && !_bbs.lookup(n->_idx) ) {  // Pinned?  Nail it down!
+        Node *input = n->in(0);
+        assert( input, "pinned Node must have Control" );
+        while( !input->is_block_start() )
+          input = input->in(0);
+        Block *b = _bbs[input->_idx];  // Basic block of controlling input
+        schedule_node_into_block(n, b);
+      }
+      for( int i = n->req() - 1; i >= 0; --i ) {  // For all inputs
+        if( n->in(i) != NULL )
+          spstack.push(n->in(i));
+      }
+    }
+  }
+}
+
+#ifdef ASSERT
+// Assert that new input b2 is dominated by all previous inputs.
+// Check this by by seeing that it is dominated by b1, the deepest
+// input observed until b2.
+static void assert_dom(Block* b1, Block* b2, Node* n, Block_Array &bbs) {
+  if (b1 == NULL)  return;
+  assert(b1->_dom_depth < b2->_dom_depth, "sanity");
+  Block* tmp = b2;
+  while (tmp != b1 && tmp != NULL) {
+    tmp = tmp->_idom;
+  }
+  if (tmp != b1) {
+    // Detected an unschedulable graph.  Print some nice stuff and die.
+    tty->print_cr("!!! Unschedulable graph !!!");
+    for (uint j=0; j<n->len(); j++) { // For all inputs
+      Node* inn = n->in(j); // Get input
+      if (inn == NULL)  continue;  // Ignore NULL, missing inputs
+      Block* inb = bbs[inn->_idx];
+      tty->print("B%d idom=B%d depth=%2d ",inb->_pre_order,
+                 inb->_idom ? inb->_idom->_pre_order : 0, inb->_dom_depth);
+      inn->dump();
+    }
+    tty->print("Failing node: ");
+    n->dump();
+    assert(false, "unscheduable graph");
+  }
+}
+#endif
+
+static Block* find_deepest_input(Node* n, Block_Array &bbs) {
+  // Find the last input dominated by all other inputs.
+  Block* deepb           = NULL;        // Deepest block so far
+  int    deepb_dom_depth = 0;
+  for (uint k = 0; k < n->len(); k++) { // For all inputs
+    Node* inn = n->in(k);               // Get input
+    if (inn == NULL)  continue;         // Ignore NULL, missing inputs
+    Block* inb = bbs[inn->_idx];
+    assert(inb != NULL, "must already have scheduled this input");
+    if (deepb_dom_depth < (int) inb->_dom_depth) {
+      // The new inb must be dominated by the previous deepb.
+      // The various inputs must be linearly ordered in the dom
+      // tree, or else there will not be a unique deepest block.
+      DEBUG_ONLY(assert_dom(deepb, inb, n, bbs));
+      deepb = inb;                      // Save deepest block
+      deepb_dom_depth = deepb->_dom_depth;
+    }
+  }
+  assert(deepb != NULL, "must be at least one input to n");
+  return deepb;
+}
+
+
+//------------------------------schedule_early---------------------------------
+// Find the earliest Block any instruction can be placed in.  Some instructions
+// are pinned into Blocks.  Unpinned instructions can appear in last block in
+// which all their inputs occur.
+bool PhaseCFG::schedule_early(VectorSet &visited, Node_List &roots) {
+  // Allocate stack with enough space to avoid frequent realloc
+  Node_Stack nstack(roots.Size() + 8); // (unique >> 1) + 24 from Java2D stats
+  // roots.push(_root); _root will be processed among C->top() inputs
+  roots.push(C->top());
+  visited.set(C->top()->_idx);
+
+  while (roots.size() != 0) {
+    // Use local variables nstack_top_n & nstack_top_i to cache values
+    // on stack's top.
+    Node *nstack_top_n = roots.pop();
+    uint  nstack_top_i = 0;
+//while_nstack_nonempty:
+    while (true) {
+      // Get parent node and next input's index from stack's top.
+      Node *n = nstack_top_n;
+      uint  i = nstack_top_i;
+
+      if (i == 0) {
+        // Special control input processing.
+        // While I am here, go ahead and look for Nodes which are taking control
+        // from a is_block_proj Node.  After I inserted RegionNodes to make proper
+        // blocks, the control at a is_block_proj more properly comes from the
+        // Region being controlled by the block_proj Node.
+        const Node *in0 = n->in(0);
+        if (in0 != NULL) {              // Control-dependent?
+          const Node *p = in0->is_block_proj();
+          if (p != NULL && p != n) {    // Control from a block projection?
+            // Find trailing Region
+            Block *pb = _bbs[in0->_idx]; // Block-projection already has basic block
+            uint j = 0;
+            if (pb->_num_succs != 1) {  // More then 1 successor?
+              // Search for successor
+              uint max = pb->_nodes.size();
+              assert( max > 1, "" );
+              uint start = max - pb->_num_succs;
+              // Find which output path belongs to projection
+              for (j = start; j < max; j++) {
+                if( pb->_nodes[j] == in0 )
+                  break;
+              }
+              assert( j < max, "must find" );
+              // Change control to match head of successor basic block
+              j -= start;
+            }
+            n->set_req(0, pb->_succs[j]->head());
+          }
+        } else {               // n->in(0) == NULL
+          if (n->req() == 1) { // This guy is a constant with NO inputs?
+            n->set_req(0, _root);
+          }
+        }
+      }
+
+      // First, visit all inputs and force them to get a block.  If an
+      // input is already in a block we quit following inputs (to avoid
+      // cycles). Instead we put that Node on a worklist to be handled
+      // later (since IT'S inputs may not have a block yet).
+      bool done = true;              // Assume all n's inputs will be processed
+      while (i < n->len()) {         // For all inputs
+        Node *in = n->in(i);         // Get input
+        ++i;
+        if (in == NULL) continue;    // Ignore NULL, missing inputs
+        int is_visited = visited.test_set(in->_idx);
+        if (!_bbs.lookup(in->_idx)) { // Missing block selection?
+          if (is_visited) {
+            // assert( !visited.test(in->_idx), "did not schedule early" );
+            return false;
+          }
+          nstack.push(n, i);         // Save parent node and next input's index.
+          nstack_top_n = in;         // Process current input now.
+          nstack_top_i = 0;
+          done = false;              // Not all n's inputs processed.
+          break; // continue while_nstack_nonempty;
+        } else if (!is_visited) {    // Input not yet visited?
+          roots.push(in);            // Visit this guy later, using worklist
+        }
+      }
+      if (done) {
+        // All of n's inputs have been processed, complete post-processing.
+
+        // Some instructions are pinned into a block.  These include Region,
+        // Phi, Start, Return, and other control-dependent instructions and
+        // any projections which depend on them.
+        if (!n->pinned()) {
+          // Set earliest legal block.
+          _bbs.map(n->_idx, find_deepest_input(n, _bbs));
+        }
+
+        if (nstack.is_empty()) {
+          // Finished all nodes on stack.
+          // Process next node on the worklist 'roots'.
+          break;
+        }
+        // Get saved parent node and next input's index.
+        nstack_top_n = nstack.node();
+        nstack_top_i = nstack.index();
+        nstack.pop();
+      } //    if (done)
+    }   // while (true)
+  }     // while (roots.size() != 0)
+  return true;
+}
+
+//------------------------------dom_lca----------------------------------------
+// Find least common ancestor in dominator tree
+// LCA is a current notion of LCA, to be raised above 'this'.
+// As a convenient boundary condition, return 'this' if LCA is NULL.
+// Find the LCA of those two nodes.
+Block* Block::dom_lca(Block* LCA) {
+  if (LCA == NULL || LCA == this)  return this;
+
+  Block* anc = this;
+  while (anc->_dom_depth > LCA->_dom_depth)
+    anc = anc->_idom;           // Walk up till anc is as high as LCA
+
+  while (LCA->_dom_depth > anc->_dom_depth)
+    LCA = LCA->_idom;           // Walk up till LCA is as high as anc
+
+  while (LCA != anc) {          // Walk both up till they are the same
+    LCA = LCA->_idom;
+    anc = anc->_idom;
+  }
+
+  return LCA;
+}
+
+//--------------------------raise_LCA_above_use--------------------------------
+// We are placing a definition, and have been given a def->use edge.
+// The definition must dominate the use, so move the LCA upward in the
+// dominator tree to dominate the use.  If the use is a phi, adjust
+// the LCA only with the phi input paths which actually use this def.
+static Block* raise_LCA_above_use(Block* LCA, Node* use, Node* def, Block_Array &bbs) {
+  Block* buse = bbs[use->_idx];
+  if (buse == NULL)    return LCA;   // Unused killing Projs have no use block
+  if (!use->is_Phi())  return buse->dom_lca(LCA);
+  uint pmax = use->req();       // Number of Phi inputs
+  // Why does not this loop just break after finding the matching input to
+  // the Phi?  Well...it's like this.  I do not have true def-use/use-def
+  // chains.  Means I cannot distinguish, from the def-use direction, which
+  // of many use-defs lead from the same use to the same def.  That is, this
+  // Phi might have several uses of the same def.  Each use appears in a
+  // different predecessor block.  But when I enter here, I cannot distinguish
+  // which use-def edge I should find the predecessor block for.  So I find
+  // them all.  Means I do a little extra work if a Phi uses the same value
+  // more than once.
+  for (uint j=1; j<pmax; j++) { // For all inputs
+    if (use->in(j) == def) {    // Found matching input?
+      Block* pred = bbs[buse->pred(j)->_idx];
+      LCA = pred->dom_lca(LCA);
+    }
+  }
+  return LCA;
+}
+
+//----------------------------raise_LCA_above_marks----------------------------
+// Return a new LCA that dominates LCA and any of its marked predecessors.
+// Search all my parents up to 'early' (exclusive), looking for predecessors
+// which are marked with the given index.  Return the LCA (in the dom tree)
+// of all marked blocks.  If there are none marked, return the original
+// LCA.
+static Block* raise_LCA_above_marks(Block* LCA, node_idx_t mark,
+                                    Block* early, Block_Array &bbs) {
+  Block_List worklist;
+  worklist.push(LCA);
+  while (worklist.size() > 0) {
+    Block* mid = worklist.pop();
+    if (mid == early)  continue;  // stop searching here
+
+    // Test and set the visited bit.
+    if (mid->raise_LCA_visited() == mark)  continue;  // already visited
+    mid->set_raise_LCA_visited(mark);
+
+    // Don't process the current LCA, otherwise the search may terminate early
+    if (mid != LCA && mid->raise_LCA_mark() == mark) {
+      // Raise the LCA.
+      LCA = mid->dom_lca(LCA);
+      if (LCA == early)  break;   // stop searching everywhere
+      assert(early->dominates(LCA), "early is high enough");
+      // Resume searching at that point, skipping intermediate levels.
+      worklist.push(LCA);
+    } else {
+      // Keep searching through this block's predecessors.
+      for (uint j = 1, jmax = mid->num_preds(); j < jmax; j++) {
+        Block* mid_parent = bbs[ mid->pred(j)->_idx ];
+        worklist.push(mid_parent);
+      }
+    }
+  }
+  return LCA;
+}
+
+//--------------------------memory_early_block--------------------------------
+// This is a variation of find_deepest_input, the heart of schedule_early.
+// Find the "early" block for a load, if we considered only memory and
+// address inputs, that is, if other data inputs were ignored.
+//
+// Because a subset of edges are considered, the resulting block will
+// be earlier (at a shallower dom_depth) than the true schedule_early
+// point of the node. We compute this earlier block as a more permissive
+// site for anti-dependency insertion, but only if subsume_loads is enabled.
+static Block* memory_early_block(Node* load, Block* early, Block_Array &bbs) {
+  Node* base;
+  Node* index;
+  Node* store = load->in(MemNode::Memory);
+  load->as_Mach()->memory_inputs(base, index);
+
+  assert(base != NodeSentinel && index != NodeSentinel,
+         "unexpected base/index inputs");
+
+  Node* mem_inputs[4];
+  int mem_inputs_length = 0;
+  if (base != NULL)  mem_inputs[mem_inputs_length++] = base;
+  if (index != NULL) mem_inputs[mem_inputs_length++] = index;
+  if (store != NULL) mem_inputs[mem_inputs_length++] = store;
+
+  // In the comparision below, add one to account for the control input,
+  // which may be null, but always takes up a spot in the in array.
+  if (mem_inputs_length + 1 < (int) load->req()) {
+    // This "load" has more inputs than just the memory, base and index inputs.
+    // For purposes of checking anti-dependences, we need to start
+    // from the early block of only the address portion of the instruction,
+    // and ignore other blocks that may have factored into the wider
+    // schedule_early calculation.
+    if (load->in(0) != NULL) mem_inputs[mem_inputs_length++] = load->in(0);
+
+    Block* deepb           = NULL;        // Deepest block so far
+    int    deepb_dom_depth = 0;
+    for (int i = 0; i < mem_inputs_length; i++) {
+      Block* inb = bbs[mem_inputs[i]->_idx];
+      if (deepb_dom_depth < (int) inb->_dom_depth) {
+        // The new inb must be dominated by the previous deepb.
+        // The various inputs must be linearly ordered in the dom
+        // tree, or else there will not be a unique deepest block.
+        DEBUG_ONLY(assert_dom(deepb, inb, load, bbs));
+        deepb = inb;                      // Save deepest block
+        deepb_dom_depth = deepb->_dom_depth;
+      }
+    }
+    early = deepb;
+  }
+
+  return early;
+}
+
+//--------------------------insert_anti_dependences---------------------------
+// A load may need to witness memory that nearby stores can overwrite.
+// For each nearby store, either insert an "anti-dependence" edge
+// from the load to the store, or else move LCA upward to force the
+// load to (eventually) be scheduled in a block above the store.
+//
+// Do not add edges to stores on distinct control-flow paths;
+// only add edges to stores which might interfere.
+//
+// Return the (updated) LCA.  There will not be any possibly interfering
+// store between the load's "early block" and the updated LCA.
+// Any stores in the updated LCA will have new precedence edges
+// back to the load.  The caller is expected to schedule the load
+// in the LCA, in which case the precedence edges will make LCM
+// preserve anti-dependences.  The caller may also hoist the load
+// above the LCA, if it is not the early block.
+Block* PhaseCFG::insert_anti_dependences(Block* LCA, Node* load, bool verify) {
+  assert(load->needs_anti_dependence_check(), "must be a load of some sort");
+  assert(LCA != NULL, "");
+  DEBUG_ONLY(Block* LCA_orig = LCA);
+
+  // Compute the alias index.  Loads and stores with different alias indices
+  // do not need anti-dependence edges.
+  uint load_alias_idx = C->get_alias_index(load->adr_type());
+#ifdef ASSERT
+  if (load_alias_idx == Compile::AliasIdxBot && C->AliasLevel() > 0 &&
+      (PrintOpto || VerifyAliases ||
+       PrintMiscellaneous && (WizardMode || Verbose))) {
+    // Load nodes should not consume all of memory.
+    // Reporting a bottom type indicates a bug in adlc.
+    // If some particular type of node validly consumes all of memory,
+    // sharpen the preceding "if" to exclude it, so we can catch bugs here.
+    tty->print_cr("*** Possible Anti-Dependence Bug:  Load consumes all of memory.");
+    load->dump(2);
+    if (VerifyAliases)  assert(load_alias_idx != Compile::AliasIdxBot, "");
+  }
+#endif
+  assert(load_alias_idx || (load->is_Mach() && load->as_Mach()->ideal_Opcode() == Op_StrComp),
+         "String compare is only known 'load' that does not conflict with any stores");
+
+  if (!C->alias_type(load_alias_idx)->is_rewritable()) {
+    // It is impossible to spoil this load by putting stores before it,
+    // because we know that the stores will never update the value
+    // which 'load' must witness.
+    return LCA;
+  }
+
+  node_idx_t load_index = load->_idx;
+
+  // Note the earliest legal placement of 'load', as determined by
+  // by the unique point in the dom tree where all memory effects
+  // and other inputs are first available.  (Computed by schedule_early.)
+  // For normal loads, 'early' is the shallowest place (dom graph wise)
+  // to look for anti-deps between this load and any store.
+  Block* early = _bbs[load_index];
+
+  // If we are subsuming loads, compute an "early" block that only considers
+  // memory or address inputs. This block may be different than the
+  // schedule_early block in that it could be at an even shallower depth in the
+  // dominator tree, and allow for a broader discovery of anti-dependences.
+  if (C->subsume_loads()) {
+    early = memory_early_block(load, early, _bbs);
+  }
+
+  ResourceArea *area = Thread::current()->resource_area();
+  Node_List worklist_mem(area);     // prior memory state to store
+  Node_List worklist_store(area);   // possible-def to explore
+  Node_List non_early_stores(area); // all relevant stores outside of early
+  bool must_raise_LCA = false;
+  DEBUG_ONLY(VectorSet should_not_repeat(area));
+
+#ifdef TRACK_PHI_INPUTS
+  // %%% This extra checking fails because MergeMem nodes are not GVNed.
+  // Provide "phi_inputs" to check if every input to a PhiNode is from the
+  // original memory state.  This indicates a PhiNode for which should not
+  // prevent the load from sinking.  For such a block, set_raise_LCA_mark
+  // may be overly conservative.
+  // Mechanism: count inputs seen for each Phi encountered in worklist_store.
+  DEBUG_ONLY(GrowableArray<uint> phi_inputs(area, C->unique(),0,0));
+#endif
+
+  // 'load' uses some memory state; look for users of the same state.
+  // Recurse through MergeMem nodes to the stores that use them.
+
+  // Each of these stores is a possible definition of memory
+  // that 'load' needs to use.  We need to force 'load'
+  // to occur before each such store.  When the store is in
+  // the same block as 'load', we insert an anti-dependence
+  // edge load->store.
+
+  // The relevant stores "nearby" the load consist of a tree rooted
+  // at initial_mem, with internal nodes of type MergeMem.
+  // Therefore, the branches visited by the worklist are of this form:
+  //    initial_mem -> (MergeMem ->)* store
+  // The anti-dependence constraints apply only to the fringe of this tree.
+
+  Node* initial_mem = load->in(MemNode::Memory);
+  worklist_store.push(initial_mem);
+  worklist_mem.push(NULL);
+  DEBUG_ONLY(should_not_repeat.test_set(initial_mem->_idx));
+  while (worklist_store.size() > 0) {
+    // Examine a nearby store to see if it might interfere with our load.
+    Node* mem   = worklist_mem.pop();
+    Node* store = worklist_store.pop();
+    uint op = store->Opcode();
+
+    // MergeMems do not directly have anti-deps.
+    // Treat them as internal nodes in a forward tree of memory states,
+    // the leaves of which are each a 'possible-def'.
+    if (store == initial_mem    // root (exclusive) of tree we are searching
+        || op == Op_MergeMem    // internal node of tree we are searching
+        ) {
+      mem = store;   // It's not a possibly interfering store.
+      for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+        store = mem->fast_out(i);
+        if (store->is_MergeMem()) {
+          // Be sure we don't get into combinatorial problems.
+          // (Allow phis to be repeated; they can merge two relevant states.)
+          uint i = worklist_store.size();
+          for (; i > 0; i--) {
+            if (worklist_store.at(i-1) == store)  break;
+          }
+          if (i > 0)  continue; // already on work list; do not repeat
+          DEBUG_ONLY(int repeated = should_not_repeat.test_set(store->_idx));
+          assert(!repeated, "do not walk merges twice");
+        }
+        worklist_mem.push(mem);
+        worklist_store.push(store);
+      }
+      continue;
+    }
+
+    if (op == Op_MachProj || op == Op_Catch)   continue;
+    if (store->needs_anti_dependence_check())  continue;  // not really a store
+
+    // Compute the alias index.  Loads and stores with different alias
+    // indices do not need anti-dependence edges.  Wide MemBar's are
+    // anti-dependent on everything (except immutable memories).
+    const TypePtr* adr_type = store->adr_type();
+    if (!C->can_alias(adr_type, load_alias_idx))  continue;
+
+    // Most slow-path runtime calls do NOT modify Java memory, but
+    // they can block and so write Raw memory.
+    if (store->is_Mach()) {
+      MachNode* mstore = store->as_Mach();
+      if (load_alias_idx != Compile::AliasIdxRaw) {
+        // Check for call into the runtime using the Java calling
+        // convention (and from there into a wrapper); it has no
+        // _method.  Can't do this optimization for Native calls because
+        // they CAN write to Java memory.
+        if (mstore->ideal_Opcode() == Op_CallStaticJava) {
+          assert(mstore->is_MachSafePoint(), "");
+          MachSafePointNode* ms = (MachSafePointNode*) mstore;
+          assert(ms->is_MachCallJava(), "");
+          MachCallJavaNode* mcj = (MachCallJavaNode*) ms;
+          if (mcj->_method == NULL) {
+            // These runtime calls do not write to Java visible memory
+            // (other than Raw) and so do not require anti-dependence edges.
+            continue;
+          }
+        }
+        // Same for SafePoints: they read/write Raw but only read otherwise.
+        // This is basically a workaround for SafePoints only defining control
+        // instead of control + memory.
+        if (mstore->ideal_Opcode() == Op_SafePoint)
+          continue;
+      } else {
+        // Some raw memory, such as the load of "top" at an allocation,
+        // can be control dependent on the previous safepoint. See
+        // comments in GraphKit::allocate_heap() about control input.
+        // Inserting an anti-dep between such a safepoint and a use
+        // creates a cycle, and will cause a subsequent failure in
+        // local scheduling.  (BugId 4919904)
+        // (%%% How can a control input be a safepoint and not a projection??)
+        if (mstore->ideal_Opcode() == Op_SafePoint && load->in(0) == mstore)
+          continue;
+      }
+    }
+
+    // Identify a block that the current load must be above,
+    // or else observe that 'store' is all the way up in the
+    // earliest legal block for 'load'.  In the latter case,
+    // immediately insert an anti-dependence edge.
+    Block* store_block = _bbs[store->_idx];
+    assert(store_block != NULL, "unused killing projections skipped above");
+
+    if (store->is_Phi()) {
+      // 'load' uses memory which is one (or more) of the Phi's inputs.
+      // It must be scheduled not before the Phi, but rather before
+      // each of the relevant Phi inputs.
+      //
+      // Instead of finding the LCA of all inputs to a Phi that match 'mem',
+      // we mark each corresponding predecessor block and do a combined
+      // hoisting operation later (raise_LCA_above_marks).
+      //
+      // Do not assert(store_block != early, "Phi merging memory after access")
+      // PhiNode may be at start of block 'early' with backedge to 'early'
+      DEBUG_ONLY(bool found_match = false);
+      for (uint j = PhiNode::Input, jmax = store->req(); j < jmax; j++) {
+        if (store->in(j) == mem) {   // Found matching input?
+          DEBUG_ONLY(found_match = true);
+          Block* pred_block = _bbs[store_block->pred(j)->_idx];
+          if (pred_block != early) {
+            // If any predecessor of the Phi matches the load's "early block",
+            // we do not need a precedence edge between the Phi and 'load'
+            // since the load will be forced into a block preceeding the Phi.
+            pred_block->set_raise_LCA_mark(load_index);
+            assert(!LCA_orig->dominates(pred_block) ||
+                   early->dominates(pred_block), "early is high enough");
+            must_raise_LCA = true;
+          }
+        }
+      }
+      assert(found_match, "no worklist bug");
+#ifdef TRACK_PHI_INPUTS
+#ifdef ASSERT
+      // This assert asks about correct handling of PhiNodes, which may not
+      // have all input edges directly from 'mem'. See BugId 4621264
+      int num_mem_inputs = phi_inputs.at_grow(store->_idx,0) + 1;
+      // Increment by exactly one even if there are multiple copies of 'mem'
+      // coming into the phi, because we will run this block several times
+      // if there are several copies of 'mem'.  (That's how DU iterators work.)
+      phi_inputs.at_put(store->_idx, num_mem_inputs);
+      assert(PhiNode::Input + num_mem_inputs < store->req(),
+             "Expect at least one phi input will not be from original memory state");
+#endif //ASSERT
+#endif //TRACK_PHI_INPUTS
+    } else if (store_block != early) {
+      // 'store' is between the current LCA and earliest possible block.
+      // Label its block, and decide later on how to raise the LCA
+      // to include the effect on LCA of this store.
+      // If this store's block gets chosen as the raised LCA, we
+      // will find him on the non_early_stores list and stick him
+      // with a precedence edge.
+      // (But, don't bother if LCA is already raised all the way.)
+      if (LCA != early) {
+        store_block->set_raise_LCA_mark(load_index);
+        must_raise_LCA = true;
+        non_early_stores.push(store);
+      }
+    } else {
+      // Found a possibly-interfering store in the load's 'early' block.
+      // This means 'load' cannot sink at all in the dominator tree.
+      // Add an anti-dep edge, and squeeze 'load' into the highest block.
+      assert(store != load->in(0), "dependence cycle found");
+      if (verify) {
+        assert(store->find_edge(load) != -1, "missing precedence edge");
+      } else {
+        store->add_prec(load);
+      }
+      LCA = early;
+      // This turns off the process of gathering non_early_stores.
+    }
+  }
+  // (Worklist is now empty; all nearby stores have been visited.)
+
+  // Finished if 'load' must be scheduled in its 'early' block.
+  // If we found any stores there, they have already been given
+  // precedence edges.
+  if (LCA == early)  return LCA;
+
+  // We get here only if there are no possibly-interfering stores
+  // in the load's 'early' block.  Move LCA up above all predecessors
+  // which contain stores we have noted.
+  //
+  // The raised LCA block can be a home to such interfering stores,
+  // but its predecessors must not contain any such stores.
+  //
+  // The raised LCA will be a lower bound for placing the load,
+  // preventing the load from sinking past any block containing
+  // a store that may invalidate the memory state required by 'load'.
+  if (must_raise_LCA)
+    LCA = raise_LCA_above_marks(LCA, load->_idx, early, _bbs);
+  if (LCA == early)  return LCA;
+
+  // Insert anti-dependence edges from 'load' to each store
+  // in the non-early LCA block.
+  // Mine the non_early_stores list for such stores.
+  if (LCA->raise_LCA_mark() == load_index) {
+    while (non_early_stores.size() > 0) {
+      Node* store = non_early_stores.pop();
+      Block* store_block = _bbs[store->_idx];
+      if (store_block == LCA) {
+        // add anti_dependence from store to load in its own block
+        assert(store != load->in(0), "dependence cycle found");
+        if (verify) {
+          assert(store->find_edge(load) != -1, "missing precedence edge");
+        } else {
+          store->add_prec(load);
+        }
+      } else {
+        assert(store_block->raise_LCA_mark() == load_index, "block was marked");
+        // Any other stores we found must be either inside the new LCA
+        // or else outside the original LCA.  In the latter case, they
+        // did not interfere with any use of 'load'.
+        assert(LCA->dominates(store_block)
+               || !LCA_orig->dominates(store_block), "no stray stores");
+      }
+    }
+  }
+
+  // Return the highest block containing stores; any stores
+  // within that block have been given anti-dependence edges.
+  return LCA;
+}
+
+// This class is used to iterate backwards over the nodes in the graph.
+
+class Node_Backward_Iterator {
+
+private:
+  Node_Backward_Iterator();
+
+public:
+  // Constructor for the iterator
+  Node_Backward_Iterator(Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs);
+
+  // Postincrement operator to iterate over the nodes
+  Node *next();
+
+private:
+  VectorSet   &_visited;
+  Node_List   &_stack;
+  Block_Array &_bbs;
+};
+
+// Constructor for the Node_Backward_Iterator
+Node_Backward_Iterator::Node_Backward_Iterator( Node *root, VectorSet &visited, Node_List &stack, Block_Array &bbs )
+  : _visited(visited), _stack(stack), _bbs(bbs) {
+  // The stack should contain exactly the root
+  stack.clear();
+  stack.push(root);
+
+  // Clear the visited bits
+  visited.Clear();
+}
+
+// Iterator for the Node_Backward_Iterator
+Node *Node_Backward_Iterator::next() {
+
+  // If the _stack is empty, then just return NULL: finished.
+  if ( !_stack.size() )
+    return NULL;
+
+  // '_stack' is emulating a real _stack.  The 'visit-all-users' loop has been
+  // made stateless, so I do not need to record the index 'i' on my _stack.
+  // Instead I visit all users each time, scanning for unvisited users.
+  // I visit unvisited not-anti-dependence users first, then anti-dependent
+  // children next.
+  Node *self = _stack.pop();
+
+  // I cycle here when I am entering a deeper level of recursion.
+  // The key variable 'self' was set prior to jumping here.
+  while( 1 ) {
+
+    _visited.set(self->_idx);
+
+    // Now schedule all uses as late as possible.
+    uint src     = self->is_Proj() ? self->in(0)->_idx : self->_idx;
+    uint src_rpo = _bbs[src]->_rpo;
+
+    // Schedule all nodes in a post-order visit
+    Node *unvisited = NULL;  // Unvisited anti-dependent Node, if any
+
+    // Scan for unvisited nodes
+    for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
+      // For all uses, schedule late
+      Node* n = self->fast_out(i); // Use
+
+      // Skip already visited children
+      if ( _visited.test(n->_idx) )
+        continue;
+
+      // do not traverse backward control edges
+      Node *use = n->is_Proj() ? n->in(0) : n;
+      uint use_rpo = _bbs[use->_idx]->_rpo;
+
+      if ( use_rpo < src_rpo )
+        continue;
+
+      // Phi nodes always precede uses in a basic block
+      if ( use_rpo == src_rpo && use->is_Phi() )
+        continue;
+
+      unvisited = n;      // Found unvisited
+
+      // Check for possible-anti-dependent
+      if( !n->needs_anti_dependence_check() )
+        break;            // Not visited, not anti-dep; schedule it NOW
+    }
+
+    // Did I find an unvisited not-anti-dependent Node?
+    if ( !unvisited )
+      break;                  // All done with children; post-visit 'self'
+
+    // Visit the unvisited Node.  Contains the obvious push to
+    // indicate I'm entering a deeper level of recursion.  I push the
+    // old state onto the _stack and set a new state and loop (recurse).
+    _stack.push(self);
+    self = unvisited;
+  } // End recursion loop
+
+  return self;
+}
+
+//------------------------------ComputeLatenciesBackwards----------------------
+// Compute the latency of all the instructions.
+void PhaseCFG::ComputeLatenciesBackwards(VectorSet &visited, Node_List &stack) {
+#ifndef PRODUCT
+  if (trace_opto_pipelining())
+    tty->print("\n#---- ComputeLatenciesBackwards ----\n");
+#endif
+
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node *n;
+
+  // Walk over all the nodes from last to first
+  while (n = iter.next()) {
+    // Set the latency for the definitions of this instruction
+    partial_latency_of_defs(n);
+  }
+} // end ComputeLatenciesBackwards
+
+//------------------------------partial_latency_of_defs------------------------
+// Compute the latency impact of this node on all defs.  This computes
+// a number that increases as we approach the beginning of the routine.
+void PhaseCFG::partial_latency_of_defs(Node *n) {
+  // Set the latency for this instruction
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("# latency_to_inputs: node_latency[%d] = %d for node",
+               n->_idx, _node_latency.at_grow(n->_idx));
+    dump();
+  }
+#endif
+
+  if (n->is_Proj())
+    n = n->in(0);
+
+  if (n->is_Root())
+    return;
+
+  uint nlen = n->len();
+  uint use_latency = _node_latency.at_grow(n->_idx);
+  uint use_pre_order = _bbs[n->_idx]->_pre_order;
+
+  for ( uint j=0; j<nlen; j++ ) {
+    Node *def = n->in(j);
+
+    if (!def || def == n)
+      continue;
+
+    // Walk backwards thru projections
+    if (def->is_Proj())
+      def = def->in(0);
+
+#ifndef PRODUCT
+    if (trace_opto_pipelining()) {
+      tty->print("#    in(%2d): ", j);
+      def->dump();
+    }
+#endif
+
+    // If the defining block is not known, assume it is ok
+    Block *def_block = _bbs[def->_idx];
+    uint def_pre_order = def_block ? def_block->_pre_order : 0;
+
+    if ( (use_pre_order <  def_pre_order) ||
+         (use_pre_order == def_pre_order && n->is_Phi()) )
+      continue;
+
+    uint delta_latency = n->latency(j);
+    uint current_latency = delta_latency + use_latency;
+
+    if (_node_latency.at_grow(def->_idx) < current_latency) {
+      _node_latency.at_put_grow(def->_idx, current_latency);
+    }
+
+#ifndef PRODUCT
+    if (trace_opto_pipelining()) {
+      tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, node_latency[%d] = %d",
+                    use_latency, j, delta_latency, current_latency, def->_idx,
+                    _node_latency.at_grow(def->_idx));
+    }
+#endif
+  }
+}
+
+//------------------------------latency_from_use-------------------------------
+// Compute the latency of a specific use
+int PhaseCFG::latency_from_use(Node *n, const Node *def, Node *use) {
+  // If self-reference, return no latency
+  if (use == n || use->is_Root())
+    return 0;
+
+  uint def_pre_order = _bbs[def->_idx]->_pre_order;
+  uint latency = 0;
+
+  // If the use is not a projection, then it is simple...
+  if (!use->is_Proj()) {
+#ifndef PRODUCT
+    if (trace_opto_pipelining()) {
+      tty->print("#    out(): ");
+      use->dump();
+    }
+#endif
+
+    uint use_pre_order = _bbs[use->_idx]->_pre_order;
+
+    if (use_pre_order < def_pre_order)
+      return 0;
+
+    if (use_pre_order == def_pre_order && use->is_Phi())
+      return 0;
+
+    uint nlen = use->len();
+    uint nl = _node_latency.at_grow(use->_idx);
+
+    for ( uint j=0; j<nlen; j++ ) {
+      if (use->in(j) == n) {
+        // Change this if we want local latencies
+        uint ul = use->latency(j);
+        uint  l = ul + nl;
+        if (latency < l) latency = l;
+#ifndef PRODUCT
+        if (trace_opto_pipelining()) {
+          tty->print_cr("#      %d + edge_latency(%d) == %d -> %d, latency = %d",
+                        nl, j, ul, l, latency);
+        }
+#endif
+      }
+    }
+  } else {
+    // This is a projection, just grab the latency of the use(s)
+    for (DUIterator_Fast jmax, j = use->fast_outs(jmax); j < jmax; j++) {
+      uint l = latency_from_use(use, def, use->fast_out(j));
+      if (latency < l) latency = l;
+    }
+  }
+
+  return latency;
+}
+
+//------------------------------latency_from_uses------------------------------
+// Compute the latency of this instruction relative to all of it's uses.
+// This computes a number that increases as we approach the beginning of the
+// routine.
+void PhaseCFG::latency_from_uses(Node *n) {
+  // Set the latency for this instruction
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("# latency_from_outputs: node_latency[%d] = %d for node",
+               n->_idx, _node_latency.at_grow(n->_idx));
+    dump();
+  }
+#endif
+  uint latency=0;
+  const Node *def = n->is_Proj() ? n->in(0): n;
+
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    uint l = latency_from_use(n, def, n->fast_out(i));
+
+    if (latency < l) latency = l;
+  }
+
+  _node_latency.at_put_grow(n->_idx, latency);
+}
+
+//------------------------------hoist_to_cheaper_block-------------------------
+// Pick a block for node self, between early and LCA, that is a cheaper
+// alternative to LCA.
+Block* PhaseCFG::hoist_to_cheaper_block(Block* LCA, Block* early, Node* self) {
+  const double delta = 1+PROB_UNLIKELY_MAG(4);
+  Block* least       = LCA;
+  double least_freq  = least->_freq;
+  uint target        = _node_latency.at_grow(self->_idx);
+  uint start_latency = _node_latency.at_grow(LCA->_nodes[0]->_idx);
+  uint end_latency   = _node_latency.at_grow(LCA->_nodes[LCA->end_idx()]->_idx);
+  bool in_latency    = (target <= start_latency);
+  const Block* root_block = _bbs[_root->_idx];
+
+  // Turn off latency scheduling if scheduling is just plain off
+  if (!C->do_scheduling())
+    in_latency = true;
+
+  // Do not hoist (to cover latency) instructions which target a
+  // single register.  Hoisting stretches the live range of the
+  // single register and may force spilling.
+  MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
+  if (mach && mach->out_RegMask().is_bound1() && mach->out_RegMask().is_NotEmpty())
+    in_latency = true;
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("# Find cheaper block for latency %d: ",
+      _node_latency.at_grow(self->_idx));
+    self->dump();
+    tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
+      LCA->_pre_order,
+      LCA->_nodes[0]->_idx,
+      start_latency,
+      LCA->_nodes[LCA->end_idx()]->_idx,
+      end_latency,
+      least_freq);
+  }
+#endif
+
+  // Walk up the dominator tree from LCA (Lowest common ancestor) to
+  // the earliest legal location.  Capture the least execution frequency.
+  while (LCA != early) {
+    LCA = LCA->_idom;         // Follow up the dominator tree
+
+    if (LCA == NULL) {
+      // Bailout without retry
+      C->record_method_not_compilable("late schedule failed: LCA == NULL");
+      return least;
+    }
+
+    // Don't hoist machine instructions to the root basic block
+    if (mach && LCA == root_block)
+      break;
+
+    uint start_lat = _node_latency.at_grow(LCA->_nodes[0]->_idx);
+    uint end_idx   = LCA->end_idx();
+    uint end_lat   = _node_latency.at_grow(LCA->_nodes[end_idx]->_idx);
+    double LCA_freq = LCA->_freq;
+#ifndef PRODUCT
+    if (trace_opto_pipelining()) {
+      tty->print_cr("#   B%d: start latency for [%4d]=%d, end latency for [%4d]=%d, freq=%g",
+        LCA->_pre_order, LCA->_nodes[0]->_idx, start_lat, end_idx, end_lat, LCA_freq);
+    }
+#endif
+    if (LCA_freq < least_freq              || // Better Frequency
+        ( !in_latency                   &&    // No block containing latency
+          LCA_freq < least_freq * delta &&    // No worse frequency
+          target >= end_lat             &&    // within latency range
+          !self->is_iteratively_computed() )  // But don't hoist IV increments
+             // because they may end up above other uses of their phi forcing
+             // their result register to be different from their input.
+       ) {
+      least = LCA;            // Found cheaper block
+      least_freq = LCA_freq;
+      start_latency = start_lat;
+      end_latency = end_lat;
+      if (target <= start_lat)
+        in_latency = true;
+    }
+  }
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print_cr("#  Choose block B%d with start latency=%d and freq=%g",
+      least->_pre_order, start_latency, least_freq);
+  }
+#endif
+
+  // See if the latency needs to be updated
+  if (target < end_latency) {
+#ifndef PRODUCT
+    if (trace_opto_pipelining()) {
+      tty->print_cr("#  Change latency for [%4d] from %d to %d", self->_idx, target, end_latency);
+    }
+#endif
+    _node_latency.at_put_grow(self->_idx, end_latency);
+    partial_latency_of_defs(self);
+  }
+
+  return least;
+}
+
+
+//------------------------------schedule_late-----------------------------------
+// Now schedule all codes as LATE as possible.  This is the LCA in the
+// dominator tree of all USES of a value.  Pick the block with the least
+// loop nesting depth that is lowest in the dominator tree.
+extern const char must_clone[];
+void PhaseCFG::schedule_late(VectorSet &visited, Node_List &stack) {
+#ifndef PRODUCT
+  if (trace_opto_pipelining())
+    tty->print("\n#---- schedule_late ----\n");
+#endif
+
+  Node_Backward_Iterator iter((Node *)_root, visited, stack, _bbs);
+  Node *self;
+
+  // Walk over all the nodes from last to first
+  while (self = iter.next()) {
+    Block* early = _bbs[self->_idx];   // Earliest legal placement
+
+    if (self->is_top()) {
+      // Top node goes in bb #2 with other constants.
+      // It must be special-cased, because it has no out edges.
+      early->add_inst(self);
+      continue;
+    }
+
+    // No uses, just terminate
+    if (self->outcnt() == 0) {
+      assert(self->Opcode() == Op_MachProj, "sanity");
+      continue;                   // Must be a dead machine projection
+    }
+
+    // If node is pinned in the block, then no scheduling can be done.
+    if( self->pinned() )          // Pinned in block?
+      continue;
+
+    MachNode* mach = self->is_Mach() ? self->as_Mach() : NULL;
+    if (mach) {
+      switch (mach->ideal_Opcode()) {
+      case Op_CreateEx:
+        // Don't move exception creation
+        early->add_inst(self);
+        continue;
+        break;
+      case Op_CheckCastPP:
+        // Don't move CheckCastPP nodes away from their input, if the input
+        // is a rawptr (5071820).
+        Node *def = self->in(1);
+        if (def != NULL && def->bottom_type()->base() == Type::RawPtr) {
+          early->add_inst(self);
+          continue;
+        }
+        break;
+      }
+    }
+
+    // Gather LCA of all uses
+    Block *LCA = NULL;
+    {
+      for (DUIterator_Fast imax, i = self->fast_outs(imax); i < imax; i++) {
+        // For all uses, find LCA
+        Node* use = self->fast_out(i);
+        LCA = raise_LCA_above_use(LCA, use, self, _bbs);
+      }
+    }  // (Hide defs of imax, i from rest of block.)
+
+    // Place temps in the block of their use.  This isn't a
+    // requirement for correctness but it reduces useless
+    // interference between temps and other nodes.
+    if (mach != NULL && mach->is_MachTemp()) {
+      _bbs.map(self->_idx, LCA);
+      LCA->add_inst(self);
+      continue;
+    }
+
+    // Check if 'self' could be anti-dependent on memory
+    if (self->needs_anti_dependence_check()) {
+      // Hoist LCA above possible-defs and insert anti-dependences to
+      // defs in new LCA block.
+      LCA = insert_anti_dependences(LCA, self);
+    }
+
+    if (early->_dom_depth > LCA->_dom_depth) {
+      // Somehow the LCA has moved above the earliest legal point.
+      // (One way this can happen is via memory_early_block.)
+      if (C->subsume_loads() == true && !C->failing()) {
+        // Retry with subsume_loads == false
+        // If this is the first failure, the sentinel string will "stick"
+        // to the Compile object, and the C2Compiler will see it and retry.
+        C->record_failure(C2Compiler::retry_no_subsuming_loads());
+      } else {
+        // Bailout without retry when (early->_dom_depth > LCA->_dom_depth)
+        C->record_method_not_compilable("late schedule failed: incorrect graph");
+      }
+      return;
+    }
+
+    // If there is no opportunity to hoist, then we're done.
+    bool try_to_hoist = (LCA != early);
+
+    // Must clone guys stay next to use; no hoisting allowed.
+    // Also cannot hoist guys that alter memory or are otherwise not
+    // allocatable (hoisting can make a value live longer, leading to
+    // anti and output dependency problems which are normally resolved
+    // by the register allocator giving everyone a different register).
+    if (mach != NULL && must_clone[mach->ideal_Opcode()])
+      try_to_hoist = false;
+
+    Block* late = NULL;
+    if (try_to_hoist) {
+      // Now find the block with the least execution frequency.
+      // Start at the latest schedule and work up to the earliest schedule
+      // in the dominator tree.  Thus the Node will dominate all its uses.
+      late = hoist_to_cheaper_block(LCA, early, self);
+    } else {
+      // Just use the LCA of the uses.
+      late = LCA;
+    }
+
+    // Put the node into target block
+    schedule_node_into_block(self, late);
+
+#ifdef ASSERT
+    if (self->needs_anti_dependence_check()) {
+      // since precedence edges are only inserted when we're sure they
+      // are needed make sure that after placement in a block we don't
+      // need any new precedence edges.
+      verify_anti_dependences(late, self);
+    }
+#endif
+  } // Loop until all nodes have been visited
+
+} // end ScheduleLate
+
+//------------------------------GlobalCodeMotion-------------------------------
+void PhaseCFG::GlobalCodeMotion( Matcher &matcher, uint unique, Node_List &proj_list ) {
+  ResourceMark rm;
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("\n---- Start GlobalCodeMotion ----\n");
+  }
+#endif
+
+  // Initialize the bbs.map for things on the proj_list
+  uint i;
+  for( i=0; i < proj_list.size(); i++ )
+    _bbs.map(proj_list[i]->_idx, NULL);
+
+  // Set the basic block for Nodes pinned into blocks
+  Arena *a = Thread::current()->resource_area();
+  VectorSet visited(a);
+  schedule_pinned_nodes( visited );
+
+  // Find the earliest Block any instruction can be placed in.  Some
+  // instructions are pinned into Blocks.  Unpinned instructions can
+  // appear in last block in which all their inputs occur.
+  visited.Clear();
+  Node_List stack(a);
+  stack.map( (unique >> 1) + 16, NULL); // Pre-grow the list
+  if (!schedule_early(visited, stack)) {
+    // Bailout without retry
+    C->record_method_not_compilable("early schedule failed");
+    return;
+  }
+
+  // Build Def-Use edges.
+  proj_list.push(_root);        // Add real root as another root
+  proj_list.pop();
+
+  // Compute the latency information (via backwards walk) for all the
+  // instructions in the graph
+  GrowableArray<uint> node_latency;
+  _node_latency = node_latency;
+
+  if( C->do_scheduling() )
+    ComputeLatenciesBackwards(visited, stack);
+
+  // Now schedule all codes as LATE as possible.  This is the LCA in the
+  // dominator tree of all USES of a value.  Pick the block with the least
+  // loop nesting depth that is lowest in the dominator tree.
+  // ( visited.Clear() called in schedule_late()->Node_Backward_Iterator() )
+  schedule_late(visited, stack);
+  if( C->failing() ) {
+    // schedule_late fails only when graph is incorrect.
+    assert(!VerifyGraphEdges, "verification should have failed");
+    return;
+  }
+
+  unique = C->unique();
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("\n---- Detect implicit null checks ----\n");
+  }
+#endif
+
+  // Detect implicit-null-check opportunities.  Basically, find NULL checks
+  // with suitable memory ops nearby.  Use the memory op to do the NULL check.
+  // I can generate a memory op if there is not one nearby.
+  if (C->is_method_compilation()) {
+    // Don't do it for natives, adapters, or runtime stubs
+    int allowed_reasons = 0;
+    // ...and don't do it when there have been too many traps, globally.
+    for (int reason = (int)Deoptimization::Reason_none+1;
+         reason < Compile::trapHistLength; reason++) {
+      assert(reason < BitsPerInt, "recode bit map");
+      if (!C->too_many_traps((Deoptimization::DeoptReason) reason))
+        allowed_reasons |= nth_bit(reason);
+    }
+    // By reversing the loop direction we get a very minor gain on mpegaudio.
+    // Feel free to revert to a forward loop for clarity.
+    // for( int i=0; i < (int)matcher._null_check_tests.size(); i+=2 ) {
+    for( int i= matcher._null_check_tests.size()-2; i>=0; i-=2 ) {
+      Node *proj = matcher._null_check_tests[i  ];
+      Node *val  = matcher._null_check_tests[i+1];
+      _bbs[proj->_idx]->implicit_null_check(this, proj, val, allowed_reasons);
+      // The implicit_null_check will only perform the transformation
+      // if the null branch is truly uncommon, *and* it leads to an
+      // uncommon trap.  Combined with the too_many_traps guards
+      // above, this prevents SEGV storms reported in 6366351,
+      // by recompiling offending methods without this optimization.
+    }
+  }
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("\n---- Start Local Scheduling ----\n");
+  }
+#endif
+
+  // Schedule locally.  Right now a simple topological sort.
+  // Later, do a real latency aware scheduler.
+  int *ready_cnt = NEW_RESOURCE_ARRAY(int,C->unique());
+  memset( ready_cnt, -1, C->unique() * sizeof(int) );
+  visited.Clear();
+  for (i = 0; i < _num_blocks; i++) {
+    if (!_blocks[i]->schedule_local(this, matcher, ready_cnt, visited)) {
+      if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
+        C->record_method_not_compilable("local schedule failed");
+      }
+      return;
+    }
+  }
+
+  // If we inserted any instructions between a Call and his CatchNode,
+  // clone the instructions on all paths below the Catch.
+  for( i=0; i < _num_blocks; i++ )
+    _blocks[i]->call_catch_cleanup(_bbs);
+
+#ifndef PRODUCT
+  if (trace_opto_pipelining()) {
+    tty->print("\n---- After GlobalCodeMotion ----\n");
+    for (uint i = 0; i < _num_blocks; i++) {
+      _blocks[i]->dump();
+    }
+  }
+#endif
+}
+
+
+//------------------------------Estimate_Block_Frequency-----------------------
+// Estimate block frequencies based on IfNode probabilities.
+void PhaseCFG::Estimate_Block_Frequency() {
+  int cnts = C->method() ? C->method()->interpreter_invocation_count() : 1;
+  // Most of our algorithms will die horribly if frequency can become
+  // negative so make sure cnts is a sane value.
+  if( cnts <= 0 ) cnts = 1;
+  float f = (float)cnts/(float)FreqCountInvocations;
+
+  // Create the loop tree and calculate loop depth.
+  _root_loop = create_loop_tree();
+  _root_loop->compute_loop_depth(0);
+
+  // Compute block frequency of each block, relative to a single loop entry.
+  _root_loop->compute_freq();
+
+  // Adjust all frequencies to be relative to a single method entry
+  _root_loop->_freq = f * 1.0;
+  _root_loop->scale_freq();
+
+  // force paths ending at uncommon traps to be infrequent
+  Block_List worklist;
+  Block* root_blk = _blocks[0];
+  for (uint i = 0; i < root_blk->num_preds(); i++) {
+    Block *pb = _bbs[root_blk->pred(i)->_idx];
+    if (pb->has_uncommon_code()) {
+      worklist.push(pb);
+    }
+  }
+  while (worklist.size() > 0) {
+    Block* uct = worklist.pop();
+    uct->_freq = PROB_MIN;
+    for (uint i = 0; i < uct->num_preds(); i++) {
+      Block *pb = _bbs[uct->pred(i)->_idx];
+      if (pb->_num_succs == 1 && pb->_freq > PROB_MIN) {
+        worklist.push(pb);
+      }
+    }
+  }
+
+#ifndef PRODUCT
+  if (PrintCFGBlockFreq) {
+    tty->print_cr("CFG Block Frequencies");
+    _root_loop->dump_tree();
+    if (Verbose) {
+      tty->print_cr("PhaseCFG dump");
+      dump();
+      tty->print_cr("Node dump");
+      _root->dump(99999);
+    }
+  }
+#endif
+}
+
+//----------------------------create_loop_tree--------------------------------
+// Create a loop tree from the CFG
+CFGLoop* PhaseCFG::create_loop_tree() {
+
+#ifdef ASSERT
+  assert( _blocks[0] == _broot, "" );
+  for (uint i = 0; i < _num_blocks; i++ ) {
+    Block *b = _blocks[i];
+    // Check that _loop field are clear...we could clear them if not.
+    assert(b->_loop == NULL, "clear _loop expected");
+    // Sanity check that the RPO numbering is reflected in the _blocks array.
+    // It doesn't have to be for the loop tree to be built, but if it is not,
+    // then the blocks have been reordered since dom graph building...which
+    // may question the RPO numbering
+    assert(b->_rpo == i, "unexpected reverse post order number");
+  }
+#endif
+
+  int idct = 0;
+  CFGLoop* root_loop = new CFGLoop(idct++);
+
+  Block_List worklist;
+
+  // Assign blocks to loops
+  for(uint i = _num_blocks - 1; i > 0; i-- ) { // skip Root block
+    Block *b = _blocks[i];
+
+    if (b->head()->is_Loop()) {
+      Block* loop_head = b;
+      assert(loop_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
+      Node* tail_n = loop_head->pred(LoopNode::LoopBackControl);
+      Block* tail = _bbs[tail_n->_idx];
+
+      // Defensively filter out Loop nodes for non-single-entry loops.
+      // For all reasonable loops, the head occurs before the tail in RPO.
+      if (i <= tail->_rpo) {
+
+        // The tail and (recursive) predecessors of the tail
+        // are made members of a new loop.
+
+        assert(worklist.size() == 0, "nonempty worklist");
+        CFGLoop* nloop = new CFGLoop(idct++);
+        assert(loop_head->_loop == NULL, "just checking");
+        loop_head->_loop = nloop;
+        // Add to nloop so push_pred() will skip over inner loops
+        nloop->add_member(loop_head);
+        nloop->push_pred(loop_head, LoopNode::LoopBackControl, worklist, _bbs);
+
+        while (worklist.size() > 0) {
+          Block* member = worklist.pop();
+          if (member != loop_head) {
+            for (uint j = 1; j < member->num_preds(); j++) {
+              nloop->push_pred(member, j, worklist, _bbs);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Create a member list for each loop consisting
+  // of both blocks and (immediate child) loops.
+  for (uint i = 0; i < _num_blocks; i++) {
+    Block *b = _blocks[i];
+    CFGLoop* lp = b->_loop;
+    if (lp == NULL) {
+      // Not assigned to a loop. Add it to the method's pseudo loop.
+      b->_loop = root_loop;
+      lp = root_loop;
+    }
+    if (lp == root_loop || b != lp->head()) { // loop heads are already members
+      lp->add_member(b);
+    }
+    if (lp != root_loop) {
+      if (lp->parent() == NULL) {
+        // Not a nested loop. Make it a child of the method's pseudo loop.
+        root_loop->add_nested_loop(lp);
+      }
+      if (b == lp->head()) {
+        // Add nested loop to member list of parent loop.
+        lp->parent()->add_member(lp);
+      }
+    }
+  }
+
+  return root_loop;
+}
+
+//------------------------------push_pred--------------------------------------
+void CFGLoop::push_pred(Block* blk, int i, Block_List& worklist, Block_Array& node_to_blk) {
+  Node* pred_n = blk->pred(i);
+  Block* pred = node_to_blk[pred_n->_idx];
+  CFGLoop *pred_loop = pred->_loop;
+  if (pred_loop == NULL) {
+    // Filter out blocks for non-single-entry loops.
+    // For all reasonable loops, the head occurs before the tail in RPO.
+    if (pred->_rpo > head()->_rpo) {
+      pred->_loop = this;
+      worklist.push(pred);
+    }
+  } else if (pred_loop != this) {
+    // Nested loop.
+    while (pred_loop->_parent != NULL && pred_loop->_parent != this) {
+      pred_loop = pred_loop->_parent;
+    }
+    // Make pred's loop be a child
+    if (pred_loop->_parent == NULL) {
+      add_nested_loop(pred_loop);
+      // Continue with loop entry predecessor.
+      Block* pred_head = pred_loop->head();
+      assert(pred_head->num_preds() - 1 == 2, "loop must have 2 predecessors");
+      assert(pred_head != head(), "loop head in only one loop");
+      push_pred(pred_head, LoopNode::EntryControl, worklist, node_to_blk);
+    } else {
+      assert(pred_loop->_parent == this && _parent == NULL, "just checking");
+    }
+  }
+}
+
+//------------------------------add_nested_loop--------------------------------
+// Make cl a child of the current loop in the loop tree.
+void CFGLoop::add_nested_loop(CFGLoop* cl) {
+  assert(_parent == NULL, "no parent yet");
+  assert(cl != this, "not my own parent");
+  cl->_parent = this;
+  CFGLoop* ch = _child;
+  if (ch == NULL) {
+    _child = cl;
+  } else {
+    while (ch->_sibling != NULL) { ch = ch->_sibling; }
+    ch->_sibling = cl;
+  }
+}
+
+//------------------------------compute_loop_depth-----------------------------
+// Store the loop depth in each CFGLoop object.
+// Recursively walk the children to do the same for them.
+void CFGLoop::compute_loop_depth(int depth) {
+  _depth = depth;
+  CFGLoop* ch = _child;
+  while (ch != NULL) {
+    ch->compute_loop_depth(depth + 1);
+    ch = ch->_sibling;
+  }
+}
+
+//------------------------------compute_freq-----------------------------------
+// Compute the frequency of each block and loop, relative to a single entry
+// into the dominating loop head.
+void CFGLoop::compute_freq() {
+  // Bottom up traversal of loop tree (visit inner loops first.)
+  // Set loop head frequency to 1.0, then transitively
+  // compute frequency for all successors in the loop,
+  // as well as for each exit edge.  Inner loops are
+  // treated as single blocks with loop exit targets
+  // as the successor blocks.
+
+  // Nested loops first
+  CFGLoop* ch = _child;
+  while (ch != NULL) {
+    ch->compute_freq();
+    ch = ch->_sibling;
+  }
+  assert (_members.length() > 0, "no empty loops");
+  Block* hd = head();
+  hd->_freq = 1.0f;
+  for (int i = 0; i < _members.length(); i++) {
+    CFGElement* s = _members.at(i);
+    float freq = s->_freq;
+    if (s->is_block()) {
+      Block* b = s->as_Block();
+      for (uint j = 0; j < b->_num_succs; j++) {
+        Block* sb = b->_succs[j];
+        update_succ_freq(sb, freq * b->succ_prob(j));
+      }
+    } else {
+      CFGLoop* lp = s->as_CFGLoop();
+      assert(lp->_parent == this, "immediate child");
+      for (int k = 0; k < lp->_exits.length(); k++) {
+        Block* eb = lp->_exits.at(k).get_target();
+        float prob = lp->_exits.at(k).get_prob();
+        update_succ_freq(eb, freq * prob);
+      }
+    }
+  }
+
+#if 0
+  // Raise frequency of the loop backedge block, in an effort
+  // to keep it empty.  Skip the method level "loop".
+  if (_parent != NULL) {
+    CFGElement* s = _members.at(_members.length() - 1);
+    if (s->is_block()) {
+      Block* bk = s->as_Block();
+      if (bk->_num_succs == 1 && bk->_succs[0] == hd) {
+        // almost any value >= 1.0f works
+        // FIXME: raw constant
+        bk->_freq = 1.05f;
+      }
+    }
+  }
+#endif
+
+  // For all loops other than the outer, "method" loop,
+  // sum and normalize the exit probability. The "method" loop
+  // should keep the initial exit probability of 1, so that
+  // inner blocks do not get erroneously scaled.
+  if (_depth != 0) {
+    // Total the exit probabilities for this loop.
+    float exits_sum = 0.0f;
+    for (int i = 0; i < _exits.length(); i++) {
+      exits_sum += _exits.at(i).get_prob();
+    }
+
+    // Normalize the exit probabilities. Until now, the
+    // probabilities estimate the possibility of exit per
+    // a single loop iteration; afterward, they estimate
+    // the probability of exit per loop entry.
+    for (int i = 0; i < _exits.length(); i++) {
+      Block* et = _exits.at(i).get_target();
+      float new_prob = _exits.at(i).get_prob() / exits_sum;
+      BlockProbPair bpp(et, new_prob);
+      _exits.at_put(i, bpp);
+    }
+
+    // Save the total, but guard against unreasoable probability,
+    // as the value is used to estimate the loop trip count.
+    // An infinite trip count would blur relative block
+    // frequencies.
+    if (exits_sum > 1.0f) exits_sum = 1.0;
+    if (exits_sum < PROB_MIN) exits_sum = PROB_MIN;
+    _exit_prob = exits_sum;
+  }
+}
+
+//------------------------------succ_prob-------------------------------------
+// Determine the probability of reaching successor 'i' from the receiver block.
+float Block::succ_prob(uint i) {
+  int eidx = end_idx();
+  Node *n = _nodes[eidx];  // Get ending Node
+  int op = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : n->Opcode();
+
+  // Switch on branch type
+  switch( op ) {
+  case Op_CountedLoopEnd:
+  case Op_If: {
+    assert (i < 2, "just checking");
+    // Conditionals pass on only part of their frequency
+    float prob  = n->as_MachIf()->_prob;
+    assert(prob >= 0.0 && prob <= 1.0, "out of range probability");
+    // If succ[i] is the FALSE branch, invert path info
+    if( _nodes[i + eidx + 1]->Opcode() == Op_IfFalse ) {
+      return 1.0f - prob; // not taken
+    } else {
+      return prob; // taken
+    }
+  }
+
+  case Op_Jump:
+    // Divide the frequency between all successors evenly
+    return 1.0f/_num_succs;
+
+  case Op_Catch: {
+    const CatchProjNode *ci = _nodes[i + eidx + 1]->as_CatchProj();
+    if (ci->_con == CatchProjNode::fall_through_index) {
+      // Fall-thru path gets the lion's share.
+      return 1.0f - PROB_UNLIKELY_MAG(5)*_num_succs;
+    } else {
+      // Presume exceptional paths are equally unlikely
+      return PROB_UNLIKELY_MAG(5);
+    }
+  }
+
+  case Op_Root:
+  case Op_Goto:
+    // Pass frequency straight thru to target
+    return 1.0f;
+
+  case Op_NeverBranch:
+    return 0.0f;
+
+  case Op_TailCall:
+  case Op_TailJump:
+  case Op_Return:
+  case Op_Halt:
+  case Op_Rethrow:
+    // Do not push out freq to root block
+    return 0.0f;
+
+  default:
+    ShouldNotReachHere();
+  }
+
+  return 0.0f;
+}
+
+//------------------------------update_succ_freq-------------------------------
+// Update the appropriate frequency associated with block 'b', a succesor of
+// a block in this loop.
+void CFGLoop::update_succ_freq(Block* b, float freq) {
+  if (b->_loop == this) {
+    if (b == head()) {
+      // back branch within the loop
+      // Do nothing now, the loop carried frequency will be
+      // adjust later in scale_freq().
+    } else {
+      // simple branch within the loop
+      b->_freq += freq;
+    }
+  } else if (!in_loop_nest(b)) {
+    // branch is exit from this loop
+    BlockProbPair bpp(b, freq);
+    _exits.append(bpp);
+  } else {
+    // branch into nested loop
+    CFGLoop* ch = b->_loop;
+    ch->_freq += freq;
+  }
+}
+
+//------------------------------in_loop_nest-----------------------------------
+// Determine if block b is in the receiver's loop nest.
+bool CFGLoop::in_loop_nest(Block* b) {
+  int depth = _depth;
+  CFGLoop* b_loop = b->_loop;
+  int b_depth = b_loop->_depth;
+  if (depth == b_depth) {
+    return true;
+  }
+  while (b_depth > depth) {
+    b_loop = b_loop->_parent;
+    b_depth = b_loop->_depth;
+  }
+  return b_loop == this;
+}
+
+//------------------------------scale_freq-------------------------------------
+// Scale frequency of loops and blocks by trip counts from outer loops
+// Do a top down traversal of loop tree (visit outer loops first.)
+void CFGLoop::scale_freq() {
+  float loop_freq = _freq * trip_count();
+  for (int i = 0; i < _members.length(); i++) {
+    CFGElement* s = _members.at(i);
+    s->_freq *= loop_freq;
+  }
+  CFGLoop* ch = _child;
+  while (ch != NULL) {
+    ch->scale_freq();
+    ch = ch->_sibling;
+  }
+}
+
+#ifndef PRODUCT
+//------------------------------dump_tree--------------------------------------
+void CFGLoop::dump_tree() const {
+  dump();
+  if (_child != NULL)   _child->dump_tree();
+  if (_sibling != NULL) _sibling->dump_tree();
+}
+
+//------------------------------dump-------------------------------------------
+void CFGLoop::dump() const {
+  for (int i = 0; i < _depth; i++) tty->print("   ");
+  tty->print("%s: %d  trip_count: %6.0f freq: %6.0f\n",
+             _depth == 0 ? "Method" : "Loop", _id, trip_count(), _freq);
+  for (int i = 0; i < _depth; i++) tty->print("   ");
+  tty->print("         members:", _id);
+  int k = 0;
+  for (int i = 0; i < _members.length(); i++) {
+    if (k++ >= 6) {
+      tty->print("\n              ");
+      for (int j = 0; j < _depth+1; j++) tty->print("   ");
+      k = 0;
+    }
+    CFGElement *s = _members.at(i);
+    if (s->is_block()) {
+      Block *b = s->as_Block();
+      tty->print(" B%d(%6.3f)", b->_pre_order, b->_freq);
+    } else {
+      CFGLoop* lp = s->as_CFGLoop();
+      tty->print(" L%d(%6.3f)", lp->_id, lp->_freq);
+    }
+  }
+  tty->print("\n");
+  for (int i = 0; i < _depth; i++) tty->print("   ");
+  tty->print("         exits:  ");
+  k = 0;
+  for (int i = 0; i < _exits.length(); i++) {
+    if (k++ >= 7) {
+      tty->print("\n              ");
+      for (int j = 0; j < _depth+1; j++) tty->print("   ");
+      k = 0;
+    }
+    Block *blk = _exits.at(i).get_target();
+    float prob = _exits.at(i).get_prob();
+    tty->print(" ->%d@%d%%", blk->_pre_order, (int)(prob*100));
+  }
+  tty->print("\n");
+}
+#endif
diff --git a/src/share/vm/opto/generateOptoStub.cpp b/src/share/vm/opto/generateOptoStub.cpp
new file mode 100644
index 000000000..490c4e855
--- /dev/null
+++ b/src/share/vm/opto/generateOptoStub.cpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_generateOptoStub.cpp.incl"
+
+//--------------------gen_stub-------------------------------
+void GraphKit::gen_stub(address C_function,
+                        const char *name,
+                        int is_fancy_jump,
+                        bool pass_tls,
+                        bool return_pc) {
+  ResourceMark rm;
+
+  const TypeTuple *jdomain = C->tf()->domain();
+  const TypeTuple *jrange  = C->tf()->range();
+
+  // The procedure start
+  StartNode* start = new (C, 2) StartNode(root(), jdomain);
+  _gvn.set_type_bottom(start);
+
+  // Make a map, with JVM state
+  uint parm_cnt = jdomain->cnt();
+  uint max_map = MAX2(2*parm_cnt+1, jrange->cnt());
+  // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+  assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+  JVMState* jvms = new (C) JVMState(0);
+  jvms->set_bci(InvocationEntryBci);
+  jvms->set_monoff(max_map);
+  jvms->set_endoff(max_map);
+  {
+    SafePointNode *map = new (C, max_map) SafePointNode( max_map, jvms );
+    jvms->set_map(map);
+    set_jvms(jvms);
+    assert(map == this->map(), "kit.map is set");
+  }
+
+  // Make up the parameters
+  uint i;
+  for( i = 0; i < parm_cnt; i++ )
+    map()->init_req(i, _gvn.transform(new (C, 1) ParmNode(start, i)));
+  for( ; i<map()->req(); i++ )
+    map()->init_req(i, top());      // For nicer debugging
+
+  // GraphKit requires memory to be a MergeMemNode:
+  set_all_memory(map()->memory());
+
+  // Get base of thread-local storage area
+  Node* thread = _gvn.transform( new (C, 1) ThreadLocalNode() );
+
+  const int NoAlias = Compile::AliasIdxBot;
+
+  Node* adr_last_Java_pc = basic_plus_adr(top(),
+                                            thread,
+                                            in_bytes(JavaThread::frame_anchor_offset()) +
+                                            in_bytes(JavaFrameAnchor::last_Java_pc_offset()));
+#if defined(SPARC) || defined(IA64)
+  Node* adr_flags = basic_plus_adr(top(),
+                                   thread,
+                                   in_bytes(JavaThread::frame_anchor_offset()) +
+                                   in_bytes(JavaFrameAnchor::flags_offset()));
+#endif /* defined(SPARC) || defined(IA64) */
+
+
+  // Drop in the last_Java_sp.  last_Java_fp is not touched.
+  // Always do this after the other "last_Java_frame" fields are set since
+  // as soon as last_Java_sp != NULL the has_last_Java_frame is true and
+  // users will look at the other fields.
+  //
+  Node *adr_sp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_sp_offset()));
+#ifndef IA64
+  Node *last_sp = basic_plus_adr(top(), frameptr(), (intptr_t) STACK_BIAS);
+  store_to_memory(NULL, adr_sp, last_sp, T_ADDRESS, NoAlias);
+#endif
+
+  // Set _thread_in_native
+  // The order of stores into TLS is critical!  Setting _thread_in_native MUST
+  // be last, because a GC is allowed at any time after setting it and the GC
+  // will require last_Java_pc and last_Java_sp.
+  Node* adr_state = basic_plus_adr(top(), thread, in_bytes(JavaThread::thread_state_offset()));
+
+  //-----------------------------
+  // Compute signature for C call.  Varies from the Java signature!
+  const Type **fields = TypeTuple::fields(2*parm_cnt+2);
+  uint cnt = TypeFunc::Parms;
+  // The C routines gets the base of thread-local storage passed in as an
+  // extra argument.  Not all calls need it, but its cheap to add here.
+  for( ; cnt<parm_cnt; cnt++ )
+    fields[cnt] = jdomain->field_at(cnt);
+  fields[cnt++] = TypeRawPtr::BOTTOM; // Thread-local storage
+  // Also pass in the caller's PC, if asked for.
+  if( return_pc )
+    fields[cnt++] = TypeRawPtr::BOTTOM; // Return PC
+
+  const TypeTuple* domain = TypeTuple::make(cnt,fields);
+  // The C routine we are about to call cannot return an oop; it can block on
+  // exit and a GC will trash the oop while it sits in C-land.  Instead, we
+  // return the oop through TLS for runtime calls.
+  // Also, C routines returning integer subword values leave the high
+  // order bits dirty; these must be cleaned up by explicit sign extension.
+  const Type* retval = (jrange->cnt() == TypeFunc::Parms) ? Type::TOP : jrange->field_at(TypeFunc::Parms);
+  // Make a private copy of jrange->fields();
+  const Type **rfields = TypeTuple::fields(jrange->cnt() - TypeFunc::Parms);
+  // Fixup oop returns
+  int retval_ptr = retval->isa_oop_ptr();
+  if( retval_ptr ) {
+    assert( pass_tls, "Oop must be returned thru TLS" );
+    // Fancy-jumps return address; others return void
+    rfields[TypeFunc::Parms] = is_fancy_jump ? TypeRawPtr::BOTTOM : Type::TOP;
+
+  } else if( retval->isa_int() ) { // Returning any integer subtype?
+    // "Fatten" byte, char & short return types to 'int' to show that
+    // the native C code can return values with junk high order bits.
+    // We'll sign-extend it below later.
+    rfields[TypeFunc::Parms] = TypeInt::INT; // It's "dirty" and needs sign-ext
+
+  } else if( jrange->cnt() >= TypeFunc::Parms+1 ) { // Else copy other types
+    rfields[TypeFunc::Parms] = jrange->field_at(TypeFunc::Parms);
+    if( jrange->cnt() == TypeFunc::Parms+2 )
+      rfields[TypeFunc::Parms+1] = jrange->field_at(TypeFunc::Parms+1);
+  }
+  const TypeTuple* range = TypeTuple::make(jrange->cnt(),rfields);
+
+  // Final C signature
+  const TypeFunc *c_sig = TypeFunc::make(domain,range);
+
+  //-----------------------------
+  // Make the call node
+  CallRuntimeNode *call = new (C, c_sig->domain()->cnt())
+    CallRuntimeNode(c_sig, C_function, name, TypePtr::BOTTOM);
+  //-----------------------------
+
+  // Fix-up the debug info for the call
+  call->set_jvms( new (C) JVMState(0) );
+  call->jvms()->set_bci(0);
+  call->jvms()->set_offsets(cnt);
+
+  // Set fixed predefined input arguments
+  cnt = 0;
+  for( i=0; i<TypeFunc::Parms; i++ )
+    call->init_req( cnt++, map()->in(i) );
+  // A little too aggressive on the parm copy; return address is not an input
+  call->set_req(TypeFunc::ReturnAdr, top());
+  for( ; i<parm_cnt; i++ )    // Regular input arguments
+    call->init_req( cnt++, map()->in(i) );
+
+  call->init_req( cnt++, thread );
+  if( return_pc )             // Return PC, if asked for
+    call->init_req( cnt++, returnadr() );
+  _gvn.transform_no_reclaim(call);
+
+
+  //-----------------------------
+  // Now set up the return results
+  set_control( _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Control)) );
+  set_i_o(     _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::I_O    )) );
+  set_all_memory_call(call);
+  if (range->cnt() > TypeFunc::Parms) {
+    Node* retnode = _gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Parms) );
+    // C-land is allowed to return sub-word values.  Convert to integer type.
+    assert( retval != Type::TOP, "" );
+    if (retval == TypeInt::BOOL) {
+      retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0xFF)) );
+    } else if (retval == TypeInt::CHAR) {
+      retnode = _gvn.transform( new (C, 3) AndINode(retnode, intcon(0xFFFF)) );
+    } else if (retval == TypeInt::BYTE) {
+      retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(24)) );
+      retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(24)) );
+    } else if (retval == TypeInt::SHORT) {
+      retnode = _gvn.transform( new (C, 3) LShiftINode(retnode, intcon(16)) );
+      retnode = _gvn.transform( new (C, 3) RShiftINode(retnode, intcon(16)) );
+    }
+    map()->set_req( TypeFunc::Parms, retnode );
+  }
+
+  //-----------------------------
+
+  // Clear last_Java_sp
+#ifdef IA64
+  if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease);
+#endif
+
+  store_to_memory(NULL, adr_sp, null(), T_ADDRESS, NoAlias);
+#ifdef IA64
+  if (os::is_MP() && UseMembar) insert_mem_bar(new MemBarVolatileNode());
+#endif // def IA64
+  // Clear last_Java_pc and (optionally)_flags
+  store_to_memory(NULL, adr_last_Java_pc, null(), T_ADDRESS, NoAlias);
+#if defined(SPARC) || defined(IA64)
+  store_to_memory(NULL, adr_flags, intcon(0), T_INT, NoAlias);
+#endif /* defined(SPARC) || defined(IA64) */
+#ifdef IA64
+  Node* adr_last_Java_fp = basic_plus_adr(top(), thread, in_bytes(JavaThread::last_Java_fp_offset()));
+  if( os::is_MP() ) insert_mem_bar(Op_MemBarRelease);
+  store_to_memory(NULL, adr_last_Java_fp,    null(),    T_ADDRESS, NoAlias);
+#endif
+
+  // For is-fancy-jump, the C-return value is also the branch target
+  Node* target = map()->in(TypeFunc::Parms);
+  // Runtime call returning oop in TLS?  Fetch it out
+  if( pass_tls ) {
+    Node* adr = basic_plus_adr(top(), thread, in_bytes(JavaThread::vm_result_offset()));
+    Node* vm_result = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+    map()->set_req(TypeFunc::Parms, vm_result); // vm_result passed as result
+    // clear thread-local-storage(tls)
+    store_to_memory(NULL, adr, null(), T_ADDRESS, NoAlias);
+  }
+
+  //-----------------------------
+  // check exception
+  Node* adr = basic_plus_adr(top(), thread, in_bytes(Thread::pending_exception_offset()));
+  Node* pending = make_load(NULL, adr, TypeOopPtr::BOTTOM, T_OBJECT, NoAlias, false);
+
+  Node* exit_memory = reset_memory();
+
+  Node* cmp = _gvn.transform( new (C, 3) CmpPNode(pending, null()) );
+  Node* bo  = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+  IfNode   *iff = create_and_map_if(control(), bo, PROB_MIN, COUNT_UNKNOWN);
+
+  Node* if_null     = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+  Node* if_not_null = _gvn.transform( new (C, 1) IfTrueNode(iff)  );
+
+  assert (StubRoutines::forward_exception_entry() != NULL, "must be generated before");
+  Node *exc_target = makecon(TypeRawPtr::make( StubRoutines::forward_exception_entry() ));
+  Node *to_exc = new (C, TypeFunc::Parms+2) TailCallNode(if_not_null,
+                                  i_o(),
+                                  exit_memory,
+                                  frameptr(),
+                                  returnadr(),
+                                  exc_target, null());
+  root()->add_req(_gvn.transform(to_exc));  // bind to root to keep live
+  C->init_start(start);
+
+  //-----------------------------
+  // If this is a normal subroutine return, issue the return and be done.
+  Node *ret;
+  switch( is_fancy_jump ) {
+  case 0:                       // Make a return instruction
+    // Return to caller, free any space for return address
+    ret = new (C, TypeFunc::Parms) ReturnNode(TypeFunc::Parms, if_null,
+                         i_o(),
+                         exit_memory,
+                         frameptr(),
+                         returnadr());
+    if (C->tf()->range()->cnt() > TypeFunc::Parms)
+      ret->add_req( map()->in(TypeFunc::Parms) );
+    break;
+  case 1:    // This is a fancy tail-call jump.  Jump to computed address.
+    // Jump to new callee; leave old return address alone.
+    ret = new (C, TypeFunc::Parms+2) TailCallNode(if_null,
+                           i_o(),
+                           exit_memory,
+                           frameptr(),
+                           returnadr(),
+                           target, map()->in(TypeFunc::Parms));
+    break;
+  case 2:                       // Pop return address & jump
+    // Throw away old return address; jump to new computed address
+    //assert(C_function == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C), "fancy_jump==2 only for rethrow");
+    ret = new (C, TypeFunc::Parms+2) TailJumpNode(if_null,
+                           i_o(),
+                           exit_memory,
+                           frameptr(),
+                           target, map()->in(TypeFunc::Parms));
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  root()->add_req(_gvn.transform(ret));
+}
diff --git a/src/share/vm/opto/graphKit.cpp b/src/share/vm/opto/graphKit.cpp
new file mode 100644
index 000000000..8df5f4272
--- /dev/null
+++ b/src/share/vm/opto/graphKit.cpp
@@ -0,0 +1,3146 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_graphKit.cpp.incl"
+
+//----------------------------GraphKit-----------------------------------------
+// Main utility constructor.
+GraphKit::GraphKit(JVMState* jvms)
+  : Phase(Phase::Parser),
+    _env(C->env()),
+    _gvn(*C->initial_gvn())
+{
+  _exceptions = jvms->map()->next_exception();
+  if (_exceptions != NULL)  jvms->map()->set_next_exception(NULL);
+  set_jvms(jvms);
+}
+
+// Private constructor for parser.
+GraphKit::GraphKit()
+  : Phase(Phase::Parser),
+    _env(C->env()),
+    _gvn(*C->initial_gvn())
+{
+  _exceptions = NULL;
+  set_map(NULL);
+  debug_only(_sp = -99);
+  debug_only(set_bci(-99));
+}
+
+
+
+//---------------------------clean_stack---------------------------------------
+// Clear away rubbish from the stack area of the JVM state.
+// This destroys any arguments that may be waiting on the stack.
+void GraphKit::clean_stack(int from_sp) {
+  SafePointNode* map      = this->map();
+  JVMState*      jvms     = this->jvms();
+  int            stk_size = jvms->stk_size();
+  int            stkoff   = jvms->stkoff();
+  Node*          top      = this->top();
+  for (int i = from_sp; i < stk_size; i++) {
+    if (map->in(stkoff + i) != top) {
+      map->set_req(stkoff + i, top);
+    }
+  }
+}
+
+
+//--------------------------------sync_jvms-----------------------------------
+// Make sure our current jvms agrees with our parse state.
+JVMState* GraphKit::sync_jvms() const {
+  JVMState* jvms = this->jvms();
+  jvms->set_bci(bci());       // Record the new bci in the JVMState
+  jvms->set_sp(sp());         // Record the new sp in the JVMState
+  assert(jvms_in_sync(), "jvms is now in sync");
+  return jvms;
+}
+
+#ifdef ASSERT
+bool GraphKit::jvms_in_sync() const {
+  Parse* parse = is_Parse();
+  if (parse == NULL) {
+    if (bci() !=      jvms()->bci())          return false;
+    if (sp()  != (int)jvms()->sp())           return false;
+    return true;
+  }
+  if (jvms()->method() != parse->method())    return false;
+  if (jvms()->bci()    != parse->bci())       return false;
+  int jvms_sp = jvms()->sp();
+  if (jvms_sp          != parse->sp())        return false;
+  int jvms_depth = jvms()->depth();
+  if (jvms_depth       != parse->depth())     return false;
+  return true;
+}
+
+// Local helper checks for special internal merge points
+// used to accumulate and merge exception states.
+// They are marked by the region's in(0) edge being the map itself.
+// Such merge points must never "escape" into the parser at large,
+// until they have been handed to gvn.transform.
+static bool is_hidden_merge(Node* reg) {
+  if (reg == NULL)  return false;
+  if (reg->is_Phi()) {
+    reg = reg->in(0);
+    if (reg == NULL)  return false;
+  }
+  return reg->is_Region() && reg->in(0) != NULL && reg->in(0)->is_Root();
+}
+
+void GraphKit::verify_map() const {
+  if (map() == NULL)  return;  // null map is OK
+  assert(map()->req() <= jvms()->endoff(), "no extra garbage on map");
+  assert(!map()->has_exceptions(),    "call add_exception_states_from 1st");
+  assert(!is_hidden_merge(control()), "call use_exception_state, not set_map");
+}
+
+void GraphKit::verify_exception_state(SafePointNode* ex_map) {
+  assert(ex_map->next_exception() == NULL, "not already part of a chain");
+  assert(has_saved_ex_oop(ex_map), "every exception state has an ex_oop");
+}
+#endif
+
+//---------------------------stop_and_kill_map---------------------------------
+// Set _map to NULL, signalling a stop to further bytecode execution.
+// First smash the current map's control to a constant, to mark it dead.
+void GraphKit::stop_and_kill_map() {
+  SafePointNode* dead_map = stop();
+  if (dead_map != NULL) {
+    dead_map->disconnect_inputs(NULL); // Mark the map as killed.
+    assert(dead_map->is_killed(), "must be so marked");
+  }
+}
+
+
+//--------------------------------stopped--------------------------------------
+// Tell if _map is NULL, or control is top.
+bool GraphKit::stopped() {
+  if (map() == NULL)           return true;
+  else if (control() == top()) return true;
+  else                         return false;
+}
+
+
+//-----------------------------has_ex_handler----------------------------------
+// Tell if this method or any caller method has exception handlers.
+bool GraphKit::has_ex_handler() {
+  for (JVMState* jvmsp = jvms(); jvmsp != NULL; jvmsp = jvmsp->caller()) {
+    if (jvmsp->has_method() && jvmsp->method()->has_exception_handlers()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+//------------------------------save_ex_oop------------------------------------
+// Save an exception without blowing stack contents or other JVM state.
+void GraphKit::set_saved_ex_oop(SafePointNode* ex_map, Node* ex_oop) {
+  assert(!has_saved_ex_oop(ex_map), "clear ex-oop before setting again");
+  ex_map->add_req(ex_oop);
+  debug_only(verify_exception_state(ex_map));
+}
+
+inline static Node* common_saved_ex_oop(SafePointNode* ex_map, bool clear_it) {
+  assert(GraphKit::has_saved_ex_oop(ex_map), "ex_oop must be there");
+  Node* ex_oop = ex_map->in(ex_map->req()-1);
+  if (clear_it)  ex_map->del_req(ex_map->req()-1);
+  return ex_oop;
+}
+
+//-----------------------------saved_ex_oop------------------------------------
+// Recover a saved exception from its map.
+Node* GraphKit::saved_ex_oop(SafePointNode* ex_map) {
+  return common_saved_ex_oop(ex_map, false);
+}
+
+//--------------------------clear_saved_ex_oop---------------------------------
+// Erase a previously saved exception from its map.
+Node* GraphKit::clear_saved_ex_oop(SafePointNode* ex_map) {
+  return common_saved_ex_oop(ex_map, true);
+}
+
+#ifdef ASSERT
+//---------------------------has_saved_ex_oop----------------------------------
+// Erase a previously saved exception from its map.
+bool GraphKit::has_saved_ex_oop(SafePointNode* ex_map) {
+  return ex_map->req() == ex_map->jvms()->endoff()+1;
+}
+#endif
+
+//-------------------------make_exception_state--------------------------------
+// Turn the current JVM state into an exception state, appending the ex_oop.
+SafePointNode* GraphKit::make_exception_state(Node* ex_oop) {
+  sync_jvms();
+  SafePointNode* ex_map = stop();  // do not manipulate this map any more
+  set_saved_ex_oop(ex_map, ex_oop);
+  return ex_map;
+}
+
+
+//--------------------------add_exception_state--------------------------------
+// Add an exception to my list of exceptions.
+void GraphKit::add_exception_state(SafePointNode* ex_map) {
+  if (ex_map == NULL || ex_map->control() == top()) {
+    return;
+  }
+#ifdef ASSERT
+  verify_exception_state(ex_map);
+  if (has_exceptions()) {
+    assert(ex_map->jvms()->same_calls_as(_exceptions->jvms()), "all collected exceptions must come from the same place");
+  }
+#endif
+
+  // If there is already an exception of exactly this type, merge with it.
+  // In particular, null-checks and other low-level exceptions common up here.
+  Node*       ex_oop  = saved_ex_oop(ex_map);
+  const Type* ex_type = _gvn.type(ex_oop);
+  if (ex_oop == top()) {
+    // No action needed.
+    return;
+  }
+  assert(ex_type->isa_instptr(), "exception must be an instance");
+  for (SafePointNode* e2 = _exceptions; e2 != NULL; e2 = e2->next_exception()) {
+    const Type* ex_type2 = _gvn.type(saved_ex_oop(e2));
+    // We check sp also because call bytecodes can generate exceptions
+    // both before and after arguments are popped!
+    if (ex_type2 == ex_type
+        && e2->_jvms->sp() == ex_map->_jvms->sp()) {
+      combine_exception_states(ex_map, e2);
+      return;
+    }
+  }
+
+  // No pre-existing exception of the same type.  Chain it on the list.
+  push_exception_state(ex_map);
+}
+
+//-----------------------add_exception_states_from-----------------------------
+void GraphKit::add_exception_states_from(JVMState* jvms) {
+  SafePointNode* ex_map = jvms->map()->next_exception();
+  if (ex_map != NULL) {
+    jvms->map()->set_next_exception(NULL);
+    for (SafePointNode* next_map; ex_map != NULL; ex_map = next_map) {
+      next_map = ex_map->next_exception();
+      ex_map->set_next_exception(NULL);
+      add_exception_state(ex_map);
+    }
+  }
+}
+
+//-----------------------transfer_exceptions_into_jvms-------------------------
+JVMState* GraphKit::transfer_exceptions_into_jvms() {
+  if (map() == NULL) {
+    // We need a JVMS to carry the exceptions, but the map has gone away.
+    // Create a scratch JVMS, cloned from any of the exception states...
+    if (has_exceptions()) {
+      _map = _exceptions;
+      _map = clone_map();
+      _map->set_next_exception(NULL);
+      clear_saved_ex_oop(_map);
+      debug_only(verify_map());
+    } else {
+      // ...or created from scratch
+      JVMState* jvms = new (C) JVMState(_method, NULL);
+      jvms->set_bci(_bci);
+      jvms->set_sp(_sp);
+      jvms->set_map(new (C, TypeFunc::Parms) SafePointNode(TypeFunc::Parms, jvms));
+      set_jvms(jvms);
+      for (uint i = 0; i < map()->req(); i++)  map()->init_req(i, top());
+      set_all_memory(top());
+      while (map()->req() < jvms->endoff())  map()->add_req(top());
+    }
+    // (This is a kludge, in case you didn't notice.)
+    set_control(top());
+  }
+  JVMState* jvms = sync_jvms();
+  assert(!jvms->map()->has_exceptions(), "no exceptions on this map yet");
+  jvms->map()->set_next_exception(_exceptions);
+  _exceptions = NULL;   // done with this set of exceptions
+  return jvms;
+}
+
+static inline void add_n_reqs(Node* dstphi, Node* srcphi) {
+  assert(is_hidden_merge(dstphi), "must be a special merge node");
+  assert(is_hidden_merge(srcphi), "must be a special merge node");
+  uint limit = srcphi->req();
+  for (uint i = PhiNode::Input; i < limit; i++) {
+    dstphi->add_req(srcphi->in(i));
+  }
+}
+static inline void add_one_req(Node* dstphi, Node* src) {
+  assert(is_hidden_merge(dstphi), "must be a special merge node");
+  assert(!is_hidden_merge(src), "must not be a special merge node");
+  dstphi->add_req(src);
+}
+
+//-----------------------combine_exception_states------------------------------
+// This helper function combines exception states by building phis on a
+// specially marked state-merging region.  These regions and phis are
+// untransformed, and can build up gradually.  The region is marked by
+// having a control input of its exception map, rather than NULL.  Such
+// regions do not appear except in this function, and in use_exception_state.
+void GraphKit::combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map) {
+  if (failing())  return;  // dying anyway...
+  JVMState* ex_jvms = ex_map->_jvms;
+  assert(ex_jvms->same_calls_as(phi_map->_jvms), "consistent call chains");
+  assert(ex_jvms->stkoff() == phi_map->_jvms->stkoff(), "matching locals");
+  assert(ex_jvms->sp() == phi_map->_jvms->sp(), "matching stack sizes");
+  assert(ex_jvms->monoff() == phi_map->_jvms->monoff(), "matching JVMS");
+  assert(ex_map->req() == phi_map->req(), "matching maps");
+  uint tos = ex_jvms->stkoff() + ex_jvms->sp();
+  Node*         hidden_merge_mark = root();
+  Node*         region  = phi_map->control();
+  MergeMemNode* phi_mem = phi_map->merged_memory();
+  MergeMemNode* ex_mem  = ex_map->merged_memory();
+  if (region->in(0) != hidden_merge_mark) {
+    // The control input is not (yet) a specially-marked region in phi_map.
+    // Make it so, and build some phis.
+    region = new (C, 2) RegionNode(2);
+    _gvn.set_type(region, Type::CONTROL);
+    region->set_req(0, hidden_merge_mark);  // marks an internal ex-state
+    region->init_req(1, phi_map->control());
+    phi_map->set_control(region);
+    Node* io_phi = PhiNode::make(region, phi_map->i_o(), Type::ABIO);
+    record_for_igvn(io_phi);
+    _gvn.set_type(io_phi, Type::ABIO);
+    phi_map->set_i_o(io_phi);
+    for (MergeMemStream mms(phi_mem); mms.next_non_empty(); ) {
+      Node* m = mms.memory();
+      Node* m_phi = PhiNode::make(region, m, Type::MEMORY, mms.adr_type(C));
+      record_for_igvn(m_phi);
+      _gvn.set_type(m_phi, Type::MEMORY);
+      mms.set_memory(m_phi);
+    }
+  }
+
+  // Either or both of phi_map and ex_map might already be converted into phis.
+  Node* ex_control = ex_map->control();
+  // if there is special marking on ex_map also, we add multiple edges from src
+  bool add_multiple = (ex_control->in(0) == hidden_merge_mark);
+  // how wide was the destination phi_map, originally?
+  uint orig_width = region->req();
+
+  if (add_multiple) {
+    add_n_reqs(region, ex_control);
+    add_n_reqs(phi_map->i_o(), ex_map->i_o());
+  } else {
+    // ex_map has no merges, so we just add single edges everywhere
+    add_one_req(region, ex_control);
+    add_one_req(phi_map->i_o(), ex_map->i_o());
+  }
+  for (MergeMemStream mms(phi_mem, ex_mem); mms.next_non_empty2(); ) {
+    if (mms.is_empty()) {
+      // get a copy of the base memory, and patch some inputs into it
+      const TypePtr* adr_type = mms.adr_type(C);
+      Node* phi = mms.force_memory()->as_Phi()->slice_memory(adr_type);
+      assert(phi->as_Phi()->region() == mms.base_memory()->in(0), "");
+      mms.set_memory(phi);
+      // Prepare to append interesting stuff onto the newly sliced phi:
+      while (phi->req() > orig_width)  phi->del_req(phi->req()-1);
+    }
+    // Append stuff from ex_map:
+    if (add_multiple) {
+      add_n_reqs(mms.memory(), mms.memory2());
+    } else {
+      add_one_req(mms.memory(), mms.memory2());
+    }
+  }
+  uint limit = ex_map->req();
+  for (uint i = TypeFunc::Parms; i < limit; i++) {
+    // Skip everything in the JVMS after tos.  (The ex_oop follows.)
+    if (i == tos)  i = ex_jvms->monoff();
+    Node* src = ex_map->in(i);
+    Node* dst = phi_map->in(i);
+    if (src != dst) {
+      PhiNode* phi;
+      if (dst->in(0) != region) {
+        dst = phi = PhiNode::make(region, dst, _gvn.type(dst));
+        record_for_igvn(phi);
+        _gvn.set_type(phi, phi->type());
+        phi_map->set_req(i, dst);
+        // Prepare to append interesting stuff onto the new phi:
+        while (dst->req() > orig_width)  dst->del_req(dst->req()-1);
+      } else {
+        assert(dst->is_Phi(), "nobody else uses a hidden region");
+        phi = (PhiNode*)dst;
+      }
+      if (add_multiple && src->in(0) == ex_control) {
+        // Both are phis.
+        add_n_reqs(dst, src);
+      } else {
+        while (dst->req() < region->req())  add_one_req(dst, src);
+      }
+      const Type* srctype = _gvn.type(src);
+      if (phi->type() != srctype) {
+        const Type* dsttype = phi->type()->meet(srctype);
+        if (phi->type() != dsttype) {
+          phi->set_type(dsttype);
+          _gvn.set_type(phi, dsttype);
+        }
+      }
+    }
+  }
+}
+
+//--------------------------use_exception_state--------------------------------
+Node* GraphKit::use_exception_state(SafePointNode* phi_map) {
+  if (failing()) { stop(); return top(); }
+  Node* region = phi_map->control();
+  Node* hidden_merge_mark = root();
+  assert(phi_map->jvms()->map() == phi_map, "sanity: 1-1 relation");
+  Node* ex_oop = clear_saved_ex_oop(phi_map);
+  if (region->in(0) == hidden_merge_mark) {
+    // Special marking for internal ex-states.  Process the phis now.
+    region->set_req(0, region);  // now it's an ordinary region
+    set_jvms(phi_map->jvms());   // ...so now we can use it as a map
+    // Note: Setting the jvms also sets the bci and sp.
+    set_control(_gvn.transform(region));
+    uint tos = jvms()->stkoff() + sp();
+    for (uint i = 1; i < tos; i++) {
+      Node* x = phi_map->in(i);
+      if (x->in(0) == region) {
+        assert(x->is_Phi(), "expected a special phi");
+        phi_map->set_req(i, _gvn.transform(x));
+      }
+    }
+    for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) {
+      Node* x = mms.memory();
+      if (x->in(0) == region) {
+        assert(x->is_Phi(), "nobody else uses a hidden region");
+        mms.set_memory(_gvn.transform(x));
+      }
+    }
+    if (ex_oop->in(0) == region) {
+      assert(ex_oop->is_Phi(), "expected a special phi");
+      ex_oop = _gvn.transform(ex_oop);
+    }
+  } else {
+    set_jvms(phi_map->jvms());
+  }
+
+  assert(!is_hidden_merge(phi_map->control()), "hidden ex. states cleared");
+  assert(!is_hidden_merge(phi_map->i_o()), "hidden ex. states cleared");
+  return ex_oop;
+}
+
+//---------------------------------java_bc-------------------------------------
+Bytecodes::Code GraphKit::java_bc() const {
+  ciMethod* method = this->method();
+  int       bci    = this->bci();
+  if (method != NULL && bci != InvocationEntryBci)
+    return method->java_code_at_bci(bci);
+  else
+    return Bytecodes::_illegal;
+}
+
+//------------------------------builtin_throw----------------------------------
+void GraphKit::builtin_throw(Deoptimization::DeoptReason reason, Node* arg) {
+  bool must_throw = true;
+
+  if (JvmtiExport::can_post_exceptions()) {
+    // Do not try anything fancy if we're notifying the VM on every throw.
+    // Cf. case Bytecodes::_athrow in parse2.cpp.
+    uncommon_trap(reason, Deoptimization::Action_none,
+                  (ciKlass*)NULL, (char*)NULL, must_throw);
+    return;
+  }
+
+  // If this particular condition has not yet happened at this
+  // bytecode, then use the uncommon trap mechanism, and allow for
+  // a future recompilation if several traps occur here.
+  // If the throw is hot, try to use a more complicated inline mechanism
+  // which keeps execution inside the compiled code.
+  bool treat_throw_as_hot = false;
+  ciMethodData* md = method()->method_data();
+
+  if (ProfileTraps) {
+    if (too_many_traps(reason)) {
+      treat_throw_as_hot = true;
+    }
+    // (If there is no MDO at all, assume it is early in
+    // execution, and that any deopts are part of the
+    // startup transient, and don't need to be remembered.)
+
+    // Also, if there is a local exception handler, treat all throws
+    // as hot if there has been at least one in this method.
+    if (C->trap_count(reason) != 0
+        && method()->method_data()->trap_count(reason) != 0
+        && has_ex_handler()) {
+        treat_throw_as_hot = true;
+    }
+  }
+
+  // If this throw happens frequently, an uncommon trap might cause
+  // a performance pothole.  If there is a local exception handler,
+  // and if this particular bytecode appears to be deoptimizing often,
+  // let us handle the throw inline, with a preconstructed instance.
+  // Note:   If the deopt count has blown up, the uncommon trap
+  // runtime is going to flush this nmethod, not matter what.
+  if (treat_throw_as_hot
+      && (!StackTraceInThrowable || OmitStackTraceInFastThrow)) {
+    // If the throw is local, we use a pre-existing instance and
+    // punt on the backtrace.  This would lead to a missing backtrace
+    // (a repeat of 4292742) if the backtrace object is ever asked
+    // for its backtrace.
+    // Fixing this remaining case of 4292742 requires some flavor of
+    // escape analysis.  Leave that for the future.
+    ciInstance* ex_obj = NULL;
+    switch (reason) {
+    case Deoptimization::Reason_null_check:
+      ex_obj = env()->NullPointerException_instance();
+      break;
+    case Deoptimization::Reason_div0_check:
+      ex_obj = env()->ArithmeticException_instance();
+      break;
+    case Deoptimization::Reason_range_check:
+      ex_obj = env()->ArrayIndexOutOfBoundsException_instance();
+      break;
+    case Deoptimization::Reason_class_check:
+      if (java_bc() == Bytecodes::_aastore) {
+        ex_obj = env()->ArrayStoreException_instance();
+      } else {
+        ex_obj = env()->ClassCastException_instance();
+      }
+      break;
+    }
+    if (failing()) { stop(); return; }  // exception allocation might fail
+    if (ex_obj != NULL) {
+      // Cheat with a preallocated exception object.
+      if (C->log() != NULL)
+        C->log()->elem("hot_throw preallocated='1' reason='%s'",
+                       Deoptimization::trap_reason_name(reason));
+      const TypeInstPtr* ex_con  = TypeInstPtr::make(ex_obj);
+      Node*              ex_node = _gvn.transform(new (C, 1) ConPNode(ex_con));
+
+      // Clear the detail message of the preallocated exception object.
+      // Weblogic sometimes mutates the detail message of exceptions
+      // using reflection.
+      int offset = java_lang_Throwable::get_detailMessage_offset();
+      const TypePtr* adr_typ = ex_con->add_offset(offset);
+
+      Node *adr = basic_plus_adr(ex_node, ex_node, offset);
+      Node *store = store_oop_to_object(control(), ex_node, adr, adr_typ, null(), ex_con, T_OBJECT);
+
+      add_exception_state(make_exception_state(ex_node));
+      return;
+    }
+  }
+
+  // %%% Maybe add entry to OptoRuntime which directly throws the exc.?
+  // It won't be much cheaper than bailing to the interp., since we'll
+  // have to pass up all the debug-info, and the runtime will have to
+  // create the stack trace.
+
+  // Usual case:  Bail to interpreter.
+  // Reserve the right to recompile if we haven't seen anything yet.
+
+  Deoptimization::DeoptAction action = Deoptimization::Action_maybe_recompile;
+  if (treat_throw_as_hot
+      && (method()->method_data()->trap_recompiled_at(bci())
+          || C->too_many_traps(reason))) {
+    // We cannot afford to take more traps here.  Suffer in the interpreter.
+    if (C->log() != NULL)
+      C->log()->elem("hot_throw preallocated='0' reason='%s' mcount='%d'",
+                     Deoptimization::trap_reason_name(reason),
+                     C->trap_count(reason));
+    action = Deoptimization::Action_none;
+  }
+
+  // "must_throw" prunes the JVM state to include only the stack, if there
+  // are no local exception handlers.  This should cut down on register
+  // allocation time and code size, by drastically reducing the number
+  // of in-edges on the call to the uncommon trap.
+
+  uncommon_trap(reason, action, (ciKlass*)NULL, (char*)NULL, must_throw);
+}
+
+
+//----------------------------PreserveJVMState---------------------------------
+PreserveJVMState::PreserveJVMState(GraphKit* kit, bool clone_map) {
+  debug_only(kit->verify_map());
+  _kit    = kit;
+  _map    = kit->map();   // preserve the map
+  _sp     = kit->sp();
+  kit->set_map(clone_map ? kit->clone_map() : NULL);
+#ifdef ASSERT
+  _bci    = kit->bci();
+  Parse* parser = kit->is_Parse();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  _block  = block;
+#endif
+}
+PreserveJVMState::~PreserveJVMState() {
+  GraphKit* kit = _kit;
+#ifdef ASSERT
+  assert(kit->bci() == _bci, "bci must not shift");
+  Parse* parser = kit->is_Parse();
+  int block = (parser == NULL || parser->block() == NULL) ? -1 : parser->block()->pre_order();
+  assert(block == _block,    "block must not shift");
+#endif
+  kit->set_map(_map);
+  kit->set_sp(_sp);
+}
+
+
+//-----------------------------BuildCutout-------------------------------------
+BuildCutout::BuildCutout(GraphKit* kit, Node* p, float prob, float cnt)
+  : PreserveJVMState(kit)
+{
+  assert(p->is_Con() || p->is_Bool(), "test must be a bool");
+  SafePointNode* outer_map = _map;   // preserved map is caller's
+  SafePointNode* inner_map = kit->map();
+  IfNode* iff = kit->create_and_map_if(outer_map->control(), p, prob, cnt);
+  outer_map->set_control(kit->gvn().transform( new (kit->C, 1) IfTrueNode(iff) ));
+  inner_map->set_control(kit->gvn().transform( new (kit->C, 1) IfFalseNode(iff) ));
+}
+BuildCutout::~BuildCutout() {
+  GraphKit* kit = _kit;
+  assert(kit->stopped(), "cutout code must stop, throw, return, etc.");
+}
+
+
+//------------------------------clone_map--------------------------------------
+// Implementation of PreserveJVMState
+//
+// Only clone_map(...) here. If this function is only used in the
+// PreserveJVMState class we may want to get rid of this extra
+// function eventually and do it all there.
+
+SafePointNode* GraphKit::clone_map() {
+  if (map() == NULL)  return NULL;
+
+  // Clone the memory edge first
+  Node* mem = MergeMemNode::make(C, map()->memory());
+  gvn().set_type_bottom(mem);
+
+  SafePointNode *clonemap = (SafePointNode*)map()->clone();
+  JVMState* jvms = this->jvms();
+  JVMState* clonejvms = jvms->clone_shallow(C);
+  clonemap->set_memory(mem);
+  clonemap->set_jvms(clonejvms);
+  clonejvms->set_map(clonemap);
+  record_for_igvn(clonemap);
+  gvn().set_type_bottom(clonemap);
+  return clonemap;
+}
+
+
+//-----------------------------set_map_clone-----------------------------------
+void GraphKit::set_map_clone(SafePointNode* m) {
+  _map = m;
+  _map = clone_map();
+  _map->set_next_exception(NULL);
+  debug_only(verify_map());
+}
+
+
+//----------------------------kill_dead_locals---------------------------------
+// Detect any locals which are known to be dead, and force them to top.
+void GraphKit::kill_dead_locals() {
+  // Consult the liveness information for the locals.  If any
+  // of them are unused, then they can be replaced by top().  This
+  // should help register allocation time and cut down on the size
+  // of the deoptimization information.
+
+  // This call is made from many of the bytecode handling
+  // subroutines called from the Big Switch in do_one_bytecode.
+  // Every bytecode which might include a slow path is responsible
+  // for killing its dead locals.  The more consistent we
+  // are about killing deads, the fewer useless phis will be
+  // constructed for them at various merge points.
+
+  // bci can be -1 (InvocationEntryBci).  We return the entry
+  // liveness for the method.
+
+  if (method() == NULL || method()->code_size() == 0) {
+    // We are building a graph for a call to a native method.
+    // All locals are live.
+    return;
+  }
+
+  ResourceMark rm;
+
+  // Consult the liveness information for the locals.  If any
+  // of them are unused, then they can be replaced by top().  This
+  // should help register allocation time and cut down on the size
+  // of the deoptimization information.
+  MethodLivenessResult live_locals = method()->liveness_at_bci(bci());
+
+  int len = (int)live_locals.size();
+  assert(len <= jvms()->loc_size(), "too many live locals");
+  for (int local = 0; local < len; local++) {
+    if (!live_locals.at(local)) {
+      set_local(local, top());
+    }
+  }
+}
+
+#ifdef ASSERT
+//-------------------------dead_locals_are_killed------------------------------
+// Return true if all dead locals are set to top in the map.
+// Used to assert "clean" debug info at various points.
+bool GraphKit::dead_locals_are_killed() {
+  if (method() == NULL || method()->code_size() == 0) {
+    // No locals need to be dead, so all is as it should be.
+    return true;
+  }
+
+  // Make sure somebody called kill_dead_locals upstream.
+  ResourceMark rm;
+  for (JVMState* jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
+    if (jvms->loc_size() == 0)  continue;  // no locals to consult
+    SafePointNode* map = jvms->map();
+    ciMethod* method = jvms->method();
+    int       bci    = jvms->bci();
+    if (jvms == this->jvms()) {
+      bci = this->bci();  // it might not yet be synched
+    }
+    MethodLivenessResult live_locals = method->liveness_at_bci(bci);
+    int len = (int)live_locals.size();
+    if (!live_locals.is_valid() || len == 0)
+      // This method is trivial, or is poisoned by a breakpoint.
+      return true;
+    assert(len == jvms->loc_size(), "live map consistent with locals map");
+    for (int local = 0; local < len; local++) {
+      if (!live_locals.at(local) && map->local(jvms, local) != top()) {
+        if (PrintMiscellaneous && (Verbose || WizardMode)) {
+          tty->print_cr("Zombie local %d: ", local);
+          jvms->dump();
+        }
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+#endif //ASSERT
+
+// Helper function for adding JVMState and debug information to node
+void GraphKit::add_safepoint_edges(SafePointNode* call, bool must_throw) {
+  // Add the safepoint edges to the call (or other safepoint).
+
+  // Make sure dead locals are set to top.  This
+  // should help register allocation time and cut down on the size
+  // of the deoptimization information.
+  assert(dead_locals_are_killed(), "garbage in debug info before safepoint");
+
+  // Walk the inline list to fill in the correct set of JVMState's
+  // Also fill in the associated edges for each JVMState.
+
+  JVMState* youngest_jvms = sync_jvms();
+
+  // Do we need debug info here?  If it is a SafePoint and this method
+  // cannot de-opt, then we do NOT need any debug info.
+  bool full_info = (C->deopt_happens() || call->Opcode() != Op_SafePoint);
+
+  // If we are guaranteed to throw, we can prune everything but the
+  // input to the current bytecode.
+  bool can_prune_locals = false;
+  uint stack_slots_not_pruned = 0;
+  int inputs = 0, depth = 0;
+  if (must_throw) {
+    assert(method() == youngest_jvms->method(), "sanity");
+    if (compute_stack_effects(inputs, depth)) {
+      can_prune_locals = true;
+      stack_slots_not_pruned = inputs;
+    }
+  }
+
+  if (JvmtiExport::can_examine_or_deopt_anywhere()) {
+    // At any safepoint, this method can get breakpointed, which would
+    // then require an immediate deoptimization.
+    full_info = true;
+    can_prune_locals = false;  // do not prune locals
+    stack_slots_not_pruned = 0;
+  }
+
+  // do not scribble on the input jvms
+  JVMState* out_jvms = youngest_jvms->clone_deep(C);
+  call->set_jvms(out_jvms); // Start jvms list for call node
+
+  // Presize the call:
+  debug_only(uint non_debug_edges = call->req());
+  call->add_req_batch(top(), youngest_jvms->debug_depth());
+  assert(call->req() == non_debug_edges + youngest_jvms->debug_depth(), "");
+
+  // Set up edges so that the call looks like this:
+  //  Call [state:] ctl io mem fptr retadr
+  //       [parms:] parm0 ... parmN
+  //       [root:]  loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  //    [...mid:]   loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN [...]
+  //       [young:] loc0 ... locN stk0 ... stkSP mon0 obj0 ... monN objN
+  // Note that caller debug info precedes callee debug info.
+
+  // Fill pointer walks backwards from "young:" to "root:" in the diagram above:
+  uint debug_ptr = call->req();
+
+  // Loop over the map input edges associated with jvms, add them
+  // to the call node, & reset all offsets to match call node array.
+  for (JVMState* in_jvms = youngest_jvms; in_jvms != NULL; ) {
+    uint debug_end   = debug_ptr;
+    uint debug_start = debug_ptr - in_jvms->debug_size();
+    debug_ptr = debug_start;  // back up the ptr
+
+    uint p = debug_start;  // walks forward in [debug_start, debug_end)
+    uint j, k, l;
+    SafePointNode* in_map = in_jvms->map();
+    out_jvms->set_map(call);
+
+    if (can_prune_locals) {
+      assert(in_jvms->method() == out_jvms->method(), "sanity");
+      // If the current throw can reach an exception handler in this JVMS,
+      // then we must keep everything live that can reach that handler.
+      // As a quick and dirty approximation, we look for any handlers at all.
+      if (in_jvms->method()->has_exception_handlers()) {
+        can_prune_locals = false;
+      }
+    }
+
+    // Add the Locals
+    k = in_jvms->locoff();
+    l = in_jvms->loc_size();
+    out_jvms->set_locoff(p);
+    if (full_info && !can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Expression Stack
+    k = in_jvms->stkoff();
+    l = in_jvms->sp();
+    out_jvms->set_stkoff(p);
+    if (full_info && !can_prune_locals) {
+      for (j = 0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else if (can_prune_locals && stack_slots_not_pruned != 0) {
+      // Divide stack into {S0,...,S1}, where S0 is set to top.
+      uint s1 = stack_slots_not_pruned;
+      stack_slots_not_pruned = 0;  // for next iteration
+      if (s1 > l)  s1 = l;
+      uint s0 = l - s1;
+      p += s0;  // skip the tops preinstalled by add_req_batch
+      for (j = s0; j < l; j++)
+        call->set_req(p++, in_map->in(k+j));
+    } else {
+      p += l;  // already set to top above by add_req_batch
+    }
+
+    // Add the Monitors
+    k = in_jvms->monoff();
+    l = in_jvms->mon_size();
+    out_jvms->set_monoff(p);
+    for (j = 0; j < l; j++)
+      call->set_req(p++, in_map->in(k+j));
+
+    // Finish the new jvms.
+    out_jvms->set_endoff(p);
+
+    assert(out_jvms->endoff()     == debug_end,             "fill ptr must match");
+    assert(out_jvms->depth()      == in_jvms->depth(),      "depth must match");
+    assert(out_jvms->loc_size()   == in_jvms->loc_size(),   "size must match");
+    assert(out_jvms->mon_size()   == in_jvms->mon_size(),   "size must match");
+    assert(out_jvms->debug_size() == in_jvms->debug_size(), "size must match");
+
+    // Update the two tail pointers in parallel.
+    out_jvms = out_jvms->caller();
+    in_jvms  = in_jvms->caller();
+  }
+
+  assert(debug_ptr == non_debug_edges, "debug info must fit exactly");
+
+  // Test the correctness of JVMState::debug_xxx accessors:
+  assert(call->jvms()->debug_start() == non_debug_edges, "");
+  assert(call->jvms()->debug_end()   == call->req(), "");
+  assert(call->jvms()->debug_depth() == call->req() - non_debug_edges, "");
+}
+
+bool GraphKit::compute_stack_effects(int& inputs, int& depth) {
+  Bytecodes::Code code = java_bc();
+  if (code == Bytecodes::_wide) {
+    code = method()->java_code_at_bci(bci() + 1);
+  }
+
+  BasicType rtype = T_ILLEGAL;
+  int       rsize = 0;
+
+  if (code != Bytecodes::_illegal) {
+    depth = Bytecodes::depth(code); // checkcast=0, athrow=-1
+    rtype = Bytecodes::result_type(code); // checkcast=P, athrow=V
+    if (rtype < T_CONFLICT)
+      rsize = type2size[rtype];
+  }
+
+  switch (code) {
+  case Bytecodes::_illegal:
+    return false;
+
+  case Bytecodes::_ldc:
+  case Bytecodes::_ldc_w:
+  case Bytecodes::_ldc2_w:
+    inputs = 0;
+    break;
+
+  case Bytecodes::_dup:         inputs = 1;  break;
+  case Bytecodes::_dup_x1:      inputs = 2;  break;
+  case Bytecodes::_dup_x2:      inputs = 3;  break;
+  case Bytecodes::_dup2:        inputs = 2;  break;
+  case Bytecodes::_dup2_x1:     inputs = 3;  break;
+  case Bytecodes::_dup2_x2:     inputs = 4;  break;
+  case Bytecodes::_swap:        inputs = 2;  break;
+  case Bytecodes::_arraylength: inputs = 1;  break;
+
+  case Bytecodes::_getstatic:
+  case Bytecodes::_putstatic:
+  case Bytecodes::_getfield:
+  case Bytecodes::_putfield:
+    {
+      bool is_get = (depth >= 0), is_static = (depth & 1);
+      bool ignore;
+      ciBytecodeStream iter(method());
+      iter.reset_to_bci(bci());
+      iter.next();
+      ciField* field = iter.get_field(ignore);
+      int      size  = field->type()->size();
+      inputs  = (is_static ? 0 : 1);
+      if (is_get) {
+        depth = size - inputs;
+      } else {
+        inputs += size;        // putxxx pops the value from the stack
+        depth = - inputs;
+      }
+    }
+    break;
+
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokespecial:
+  case Bytecodes::_invokestatic:
+  case Bytecodes::_invokeinterface:
+    {
+      bool is_static = (depth == 0);
+      bool ignore;
+      ciBytecodeStream iter(method());
+      iter.reset_to_bci(bci());
+      iter.next();
+      ciMethod* method = iter.get_method(ignore);
+      inputs = method->arg_size_no_receiver();
+      if (!is_static)  inputs += 1;
+      int size = method->return_type()->size();
+      depth = size - inputs;
+    }
+    break;
+
+  case Bytecodes::_multianewarray:
+    {
+      ciBytecodeStream iter(method());
+      iter.reset_to_bci(bci());
+      iter.next();
+      inputs = iter.get_dimensions();
+      assert(rsize == 1, "");
+      depth = rsize - inputs;
+    }
+    break;
+
+  case Bytecodes::_ireturn:
+  case Bytecodes::_lreturn:
+  case Bytecodes::_freturn:
+  case Bytecodes::_dreturn:
+  case Bytecodes::_areturn:
+    assert(rsize = -depth, "");
+    inputs = rsize;
+    break;
+
+  case Bytecodes::_jsr:
+  case Bytecodes::_jsr_w:
+    inputs = 0;
+    depth  = 1;                  // S.B. depth=1, not zero
+    break;
+
+  default:
+    // bytecode produces a typed result
+    inputs = rsize - depth;
+    assert(inputs >= 0, "");
+    break;
+  }
+
+#ifdef ASSERT
+  // spot check
+  int outputs = depth + inputs;
+  assert(outputs >= 0, "sanity");
+  switch (code) {
+  case Bytecodes::_checkcast: assert(inputs == 1 && outputs == 1, ""); break;
+  case Bytecodes::_athrow:    assert(inputs == 1 && outputs == 0, ""); break;
+  case Bytecodes::_aload_0:   assert(inputs == 0 && outputs == 1, ""); break;
+  case Bytecodes::_return:    assert(inputs == 0 && outputs == 0, ""); break;
+  case Bytecodes::_drem:      assert(inputs == 4 && outputs == 2, ""); break;
+  }
+#endif //ASSERT
+
+  return true;
+}
+
+
+
+//------------------------------basic_plus_adr---------------------------------
+Node* GraphKit::basic_plus_adr(Node* base, Node* ptr, Node* offset) {
+  // short-circuit a common case
+  if (offset == intcon(0))  return ptr;
+  return _gvn.transform( new (C, 4) AddPNode(base, ptr, offset) );
+}
+
+Node* GraphKit::ConvI2L(Node* offset) {
+  // short-circuit a common case
+  jint offset_con = find_int_con(offset, Type::OffsetBot);
+  if (offset_con != Type::OffsetBot) {
+    return longcon((long) offset_con);
+  }
+  return _gvn.transform( new (C, 2) ConvI2LNode(offset));
+}
+Node* GraphKit::ConvL2I(Node* offset) {
+  // short-circuit a common case
+  jlong offset_con = find_long_con(offset, (jlong)Type::OffsetBot);
+  if (offset_con != (jlong)Type::OffsetBot) {
+    return intcon((int) offset_con);
+  }
+  return _gvn.transform( new (C, 2) ConvL2INode(offset));
+}
+
+//-------------------------load_object_klass-----------------------------------
+Node* GraphKit::load_object_klass(Node* obj) {
+  // Special-case a fresh allocation to avoid building nodes:
+  Node* akls = AllocateNode::Ideal_klass(obj, &_gvn);
+  if (akls != NULL)  return akls;
+  Node* k_adr = basic_plus_adr(obj, oopDesc::klass_offset_in_bytes());
+  return _gvn.transform( new (C, 3) LoadKlassNode(0, immutable_memory(), k_adr, TypeInstPtr::KLASS) );
+}
+
+//-------------------------load_array_length-----------------------------------
+Node* GraphKit::load_array_length(Node* array) {
+  // Special-case a fresh allocation to avoid building nodes:
+  Node* alen = AllocateArrayNode::Ideal_length(array, &_gvn);
+  if (alen != NULL)  return alen;
+  Node *r_adr = basic_plus_adr(array, arrayOopDesc::length_offset_in_bytes());
+  return _gvn.transform( new (C, 3) LoadRangeNode(0, immutable_memory(), r_adr, TypeInt::POS));
+}
+
+//------------------------------do_null_check----------------------------------
+// Helper function to do a NULL pointer check.  Returned value is
+// the incoming address with NULL casted away.  You are allowed to use the
+// not-null value only if you are control dependent on the test.
+extern int explicit_null_checks_inserted,
+           explicit_null_checks_elided;
+Node* GraphKit::null_check_common(Node* value, BasicType type,
+                                  // optional arguments for variations:
+                                  bool assert_null,
+                                  Node* *null_control) {
+  assert(!assert_null || null_control == NULL, "not both at once");
+  if (stopped())  return top();
+  if (!GenerateCompilerNullChecks && !assert_null && null_control == NULL) {
+    // For some performance testing, we may wish to suppress null checking.
+    value = cast_not_null(value);   // Make it appear to be non-null (4962416).
+    return value;
+  }
+  explicit_null_checks_inserted++;
+
+  // Construct NULL check
+  Node *chk = NULL;
+  switch(type) {
+    case T_LONG   : chk = new (C, 3) CmpLNode(value, _gvn.zerocon(T_LONG)); break;
+    case T_INT    : chk = new (C, 3) CmpINode( value, _gvn.intcon(0)); break;
+    case T_ARRAY  : // fall through
+      type = T_OBJECT;  // simplify further tests
+    case T_OBJECT : {
+      const Type *t = _gvn.type( value );
+
+      const TypeInstPtr* tp = t->isa_instptr();
+      if (tp != NULL && !tp->klass()->is_loaded()
+          // Only for do_null_check, not any of its siblings:
+          && !assert_null && null_control == NULL) {
+        // Usually, any field access or invocation on an unloaded oop type
+        // will simply fail to link, since the statically linked class is
+        // likely also to be unloaded.  However, in -Xcomp mode, sometimes
+        // the static class is loaded but the sharper oop type is not.
+        // Rather than checking for this obscure case in lots of places,
+        // we simply observe that a null check on an unloaded class
+        // will always be followed by a nonsense operation, so we
+        // can just issue the uncommon trap here.
+        // Our access to the unloaded class will only be correct
+        // after it has been loaded and initialized, which requires
+        // a trip through the interpreter.
+#ifndef PRODUCT
+        if (WizardMode) { tty->print("Null check of unloaded "); tp->klass()->print(); tty->cr(); }
+#endif
+        uncommon_trap(Deoptimization::Reason_unloaded,
+                      Deoptimization::Action_reinterpret,
+                      tp->klass(), "!loaded");
+        return top();
+      }
+
+      if (assert_null) {
+        // See if the type is contained in NULL_PTR.
+        // If so, then the value is already null.
+        if (t->higher_equal(TypePtr::NULL_PTR)) {
+          explicit_null_checks_elided++;
+          return value;           // Elided null assert quickly!
+        }
+      } else {
+        // See if mixing in the NULL pointer changes type.
+        // If so, then the NULL pointer was not allowed in the original
+        // type.  In other words, "value" was not-null.
+        if (t->meet(TypePtr::NULL_PTR) != t) {
+          // same as: if (!TypePtr::NULL_PTR->higher_equal(t)) ...
+          explicit_null_checks_elided++;
+          return value;           // Elided null check quickly!
+        }
+      }
+      chk = new (C, 3) CmpPNode( value, null() );
+      break;
+    }
+
+    default      : ShouldNotReachHere();
+  }
+  assert(chk != NULL, "sanity check");
+  chk = _gvn.transform(chk);
+
+  BoolTest::mask btest = assert_null ? BoolTest::eq : BoolTest::ne;
+  BoolNode *btst = new (C, 2) BoolNode( chk, btest);
+  Node   *tst = _gvn.transform( btst );
+
+  //-----------
+  // if peephole optimizations occured, a prior test existed.
+  // If a prior test existed, maybe it dominates as we can avoid this test.
+  if (tst != btst && type == T_OBJECT) {
+    // At this point we want to scan up the CFG to see if we can
+    // find an identical test (and so avoid this test altogether).
+    Node *cfg = control();
+    int depth = 0;
+    while( depth < 16 ) {       // Limit search depth for speed
+      if( cfg->Opcode() == Op_IfTrue &&
+          cfg->in(0)->in(1) == tst ) {
+        // Found prior test.  Use "cast_not_null" to construct an identical
+        // CastPP (and hence hash to) as already exists for the prior test.
+        // Return that casted value.
+        if (assert_null) {
+          replace_in_map(value, null());
+          return null();  // do not issue the redundant test
+        }
+        Node *oldcontrol = control();
+        set_control(cfg);
+        Node *res = cast_not_null(value);
+        set_control(oldcontrol);
+        explicit_null_checks_elided++;
+        return res;
+      }
+      cfg = IfNode::up_one_dom(cfg, /*linear_only=*/ true);
+      if (cfg == NULL)  break;  // Quit at region nodes
+      depth++;
+    }
+  }
+
+  //-----------
+  // Branch to failure if null
+  float ok_prob = PROB_MAX;  // a priori estimate:  nulls never happen
+  Deoptimization::DeoptReason reason;
+  if (assert_null)
+    reason = Deoptimization::Reason_null_assert;
+  else if (type == T_OBJECT)
+    reason = Deoptimization::Reason_null_check;
+  else
+    reason = Deoptimization::Reason_div0_check;
+
+  // To cause an implicit null check, we set the not-null probability
+  // to the maximum (PROB_MAX).  For an explicit check the probablity
+  // is set to a smaller value.
+  if (null_control != NULL || too_many_traps(reason)) {
+    // probability is less likely
+    ok_prob =  PROB_LIKELY_MAG(3);
+  } else if (!assert_null &&
+             (ImplicitNullCheckThreshold > 0) &&
+             method() != NULL &&
+             (method()->method_data()->trap_count(reason)
+              >= (uint)ImplicitNullCheckThreshold)) {
+    ok_prob =  PROB_LIKELY_MAG(3);
+  }
+
+  if (null_control != NULL) {
+    IfNode* iff = create_and_map_if(control(), tst, ok_prob, COUNT_UNKNOWN);
+    Node* null_true = _gvn.transform( new (C, 1) IfFalseNode(iff));
+    set_control(      _gvn.transform( new (C, 1) IfTrueNode(iff)));
+    if (null_true == top())
+      explicit_null_checks_elided++;
+    (*null_control) = null_true;
+  } else {
+    BuildCutout unless(this, tst, ok_prob);
+    // Check for optimizer eliding test at parse time
+    if (stopped()) {
+      // Failure not possible; do not bother making uncommon trap.
+      explicit_null_checks_elided++;
+    } else if (assert_null) {
+      uncommon_trap(reason,
+                    Deoptimization::Action_make_not_entrant,
+                    NULL, "assert_null");
+    } else {
+      builtin_throw(reason);
+    }
+  }
+
+  // Must throw exception, fall-thru not possible?
+  if (stopped()) {
+    return top();               // No result
+  }
+
+  if (assert_null) {
+    // Cast obj to null on this path.
+    replace_in_map(value, zerocon(type));
+    return zerocon(type);
+  }
+
+  // Cast obj to not-null on this path, if there is no null_control.
+  // (If there is a null_control, a non-null value may come back to haunt us.)
+  if (type == T_OBJECT) {
+    Node* cast = cast_not_null(value, false);
+    if (null_control == NULL || (*null_control) == top())
+      replace_in_map(value, cast);
+    value = cast;
+  }
+
+  return value;
+}
+
+
+//------------------------------cast_not_null----------------------------------
+// Cast obj to not-null on this path
+Node* GraphKit::cast_not_null(Node* obj, bool do_replace_in_map) {
+  const Type *t = _gvn.type(obj);
+  const Type *t_not_null = t->join(TypePtr::NOTNULL);
+  // Object is already not-null?
+  if( t == t_not_null ) return obj;
+
+  Node *cast = new (C, 2) CastPPNode(obj,t_not_null);
+  cast->init_req(0, control());
+  cast = _gvn.transform( cast );
+
+  // Scan for instances of 'obj' in the current JVM mapping.
+  // These instances are known to be not-null after the test.
+  if (do_replace_in_map)
+    replace_in_map(obj, cast);
+
+  return cast;                  // Return casted value
+}
+
+
+//--------------------------replace_in_map-------------------------------------
+void GraphKit::replace_in_map(Node* old, Node* neww) {
+  this->map()->replace_edge(old, neww);
+
+  // Note: This operation potentially replaces any edge
+  // on the map.  This includes locals, stack, and monitors
+  // of the current (innermost) JVM state.
+
+  // We can consider replacing in caller maps.
+  // The idea would be that an inlined function's null checks
+  // can be shared with the entire inlining tree.
+  // The expense of doing this is that the PreserveJVMState class
+  // would have to preserve caller states too, with a deep copy.
+}
+
+
+
+//=============================================================================
+//--------------------------------memory---------------------------------------
+Node* GraphKit::memory(uint alias_idx) {
+  MergeMemNode* mem = merged_memory();
+  Node* p = mem->memory_at(alias_idx);
+  _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  return p;
+}
+
+//-----------------------------reset_memory------------------------------------
+Node* GraphKit::reset_memory() {
+  Node* mem = map()->memory();
+  // do not use this node for any more parsing!
+  debug_only( map()->set_memory((Node*)NULL) );
+  return _gvn.transform( mem );
+}
+
+//------------------------------set_all_memory---------------------------------
+void GraphKit::set_all_memory(Node* newmem) {
+  Node* mergemem = MergeMemNode::make(C, newmem);
+  gvn().set_type_bottom(mergemem);
+  map()->set_memory(mergemem);
+}
+
+//------------------------------set_all_memory_call----------------------------
+void GraphKit::set_all_memory_call(Node* call) {
+  Node* newmem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+  set_all_memory(newmem);
+}
+
+//=============================================================================
+//
+// parser factory methods for MemNodes
+//
+// These are layered on top of the factory methods in LoadNode and StoreNode,
+// and integrate with the parser's memory state and _gvn engine.
+//
+
+// factory methods in "int adr_idx"
+Node* GraphKit::make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+                          int adr_idx,
+                          bool require_atomic_access) {
+  assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
+  const TypePtr* adr_type = NULL; // debug-mode-only argument
+  debug_only(adr_type = C->get_adr_type(adr_idx));
+  Node* mem = memory(adr_idx);
+  Node* ld;
+  if (require_atomic_access && bt == T_LONG) {
+    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+  } else {
+    ld = LoadNode::make(C, ctl, mem, adr, adr_type, t, bt);
+  }
+  return _gvn.transform(ld);
+}
+
+Node* GraphKit::store_to_memory(Node* ctl, Node* adr, Node *val, BasicType bt,
+                                int adr_idx,
+                                bool require_atomic_access) {
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+  const TypePtr* adr_type = NULL;
+  debug_only(adr_type = C->get_adr_type(adr_idx));
+  Node *mem = memory(adr_idx);
+  Node* st;
+  if (require_atomic_access && bt == T_LONG) {
+    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+  } else {
+    st = StoreNode::make(C, ctl, mem, adr, adr_type, val, bt);
+  }
+  st = _gvn.transform(st);
+  set_memory(st, adr_idx);
+  // Back-to-back stores can only remove intermediate store with DU info
+  // so push on worklist for optimizer.
+  if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
+    record_for_igvn(st);
+
+  return st;
+}
+
+void GraphKit::pre_barrier(Node* ctl,
+                           Node* obj,
+                           Node* adr,
+                           uint adr_idx,
+                           Node *val,
+                           const Type* val_type,
+                           BasicType bt) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  set_control(ctl);
+  switch (bs->kind()) {
+
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+    case BarrierSet::ModRef:
+      break;
+
+    case BarrierSet::Other:
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
+void GraphKit::post_barrier(Node* ctl,
+                            Node* store,
+                            Node* obj,
+                            Node* adr,
+                            uint adr_idx,
+                            Node *val,
+                            BasicType bt,
+                            bool use_precise) {
+  BarrierSet* bs = Universe::heap()->barrier_set();
+  set_control(ctl);
+  switch (bs->kind()) {
+
+    case BarrierSet::CardTableModRef:
+    case BarrierSet::CardTableExtension:
+      write_barrier_post(store, obj, adr, val, use_precise);
+      break;
+
+    case BarrierSet::ModRef:
+      break;
+
+    case BarrierSet::Other:
+    default      :
+      ShouldNotReachHere();
+
+  }
+}
+
+Node* GraphKit::store_oop_to_object(Node* ctl,
+                                    Node* obj,
+                                    Node* adr,
+                                    const TypePtr* adr_type,
+                                    Node *val,
+                                    const Type* val_type,
+                                    BasicType bt) {
+  uint adr_idx = C->get_alias_index(adr_type);
+  Node* store;
+  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+  store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, false);
+  return store;
+}
+
+Node* GraphKit::store_oop_to_array(Node* ctl,
+                                   Node* obj,
+                                   Node* adr,
+                                   const TypePtr* adr_type,
+                                   Node *val,
+                                   const Type* val_type,
+                                   BasicType bt) {
+  uint adr_idx = C->get_alias_index(adr_type);
+  Node* store;
+  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+  store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
+  return store;
+}
+
+Node* GraphKit::store_oop_to_unknown(Node* ctl,
+                                     Node* obj,
+                                     Node* adr,
+                                     const TypePtr* adr_type,
+                                     Node *val,
+                                     const Type* val_type,
+                                     BasicType bt) {
+  uint adr_idx = C->get_alias_index(adr_type);
+  Node* store;
+  pre_barrier(ctl, obj, adr, adr_idx, val, val_type, bt);
+  store = store_to_memory(control(), adr, val, bt, adr_idx);
+  post_barrier(control(), store, obj, adr, adr_idx, val, bt, true);
+  return store;
+}
+
+
+//-------------------------array_element_address-------------------------
+Node* GraphKit::array_element_address(Node* ary, Node* idx, BasicType elembt,
+                                      const TypeInt* sizetype) {
+  uint shift  = exact_log2(type2aelembytes[elembt]);
+  uint header = arrayOopDesc::base_offset_in_bytes(elembt);
+
+  // short-circuit a common case (saves lots of confusing waste motion)
+  jint idx_con = find_int_con(idx, -1);
+  if (idx_con >= 0) {
+    intptr_t offset = header + ((intptr_t)idx_con << shift);
+    return basic_plus_adr(ary, offset);
+  }
+
+  // must be correct type for alignment purposes
+  Node* base  = basic_plus_adr(ary, header);
+#ifdef _LP64
+  // The scaled index operand to AddP must be a clean 64-bit value.
+  // Java allows a 32-bit int to be incremented to a negative
+  // value, which appears in a 64-bit register as a large
+  // positive number.  Using that large positive number as an
+  // operand in pointer arithmetic has bad consequences.
+  // On the other hand, 32-bit overflow is rare, and the possibility
+  // can often be excluded, if we annotate the ConvI2L node with
+  // a type assertion that its value is known to be a small positive
+  // number.  (The prior range check has ensured this.)
+  // This assertion is used by ConvI2LNode::Ideal.
+  int index_max = max_jint - 1;  // array size is max_jint, index is one less
+  if (sizetype != NULL)  index_max = sizetype->_hi - 1;
+  const TypeLong* lidxtype = TypeLong::make(CONST64(0), index_max, Type::WidenMax);
+  idx = _gvn.transform( new (C, 2) ConvI2LNode(idx, lidxtype) );
+#endif
+  Node* scale = _gvn.transform( new (C, 3) LShiftXNode(idx, intcon(shift)) );
+  return basic_plus_adr(ary, base, scale);
+}
+
+//-------------------------load_array_element-------------------------
+Node* GraphKit::load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype) {
+  const Type* elemtype = arytype->elem();
+  BasicType elembt = elemtype->array_element_basic_type();
+  Node* adr = array_element_address(ary, idx, elembt, arytype->size());
+  Node* ld = make_load(ctl, adr, elemtype, elembt, arytype);
+  return ld;
+}
+
+//-------------------------set_arguments_for_java_call-------------------------
+// Arguments (pre-popped from the stack) are taken from the JVMS.
+void GraphKit::set_arguments_for_java_call(CallJavaNode* call) {
+  // Add the call arguments:
+  uint nargs = call->method()->arg_size();
+  for (uint i = 0; i < nargs; i++) {
+    Node* arg = argument(i);
+    call->init_req(i + TypeFunc::Parms, arg);
+  }
+}
+
+//---------------------------set_edges_for_java_call---------------------------
+// Connect a newly created call into the current JVMS.
+// A return value node (if any) is returned from set_edges_for_java_call.
+void GraphKit::set_edges_for_java_call(CallJavaNode* call, bool must_throw) {
+
+  // Add the predefined inputs:
+  call->init_req( TypeFunc::Control, control() );
+  call->init_req( TypeFunc::I_O    , i_o() );
+  call->init_req( TypeFunc::Memory , reset_memory() );
+  call->init_req( TypeFunc::FramePtr, frameptr() );
+  call->init_req( TypeFunc::ReturnAdr, top() );
+
+  add_safepoint_edges(call, must_throw);
+
+  Node* xcall = _gvn.transform(call);
+
+  if (xcall == top()) {
+    set_control(top());
+    return;
+  }
+  assert(xcall == call, "call identity is stable");
+
+  // Re-use the current map to produce the result.
+
+  set_control(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Control)));
+  set_i_o(    _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O    )));
+  set_all_memory_call(xcall);
+
+  //return xcall;   // no need, caller already has it
+}
+
+Node* GraphKit::set_results_for_java_call(CallJavaNode* call) {
+  if (stopped())  return top();  // maybe the call folded up?
+
+  // Capture the return value, if any.
+  Node* ret;
+  if (call->method() == NULL ||
+      call->method()->return_type()->basic_type() == T_VOID)
+        ret = top();
+  else  ret = _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+
+  // Note:  Since any out-of-line call can produce an exception,
+  // we always insert an I_O projection from the call into the result.
+
+  make_slow_call_ex(call, env()->Throwable_klass(), false);
+
+  return ret;
+}
+
+//--------------------set_predefined_input_for_runtime_call--------------------
+// Reading and setting the memory state is way conservative here.
+// The real problem is that I am not doing real Type analysis on memory,
+// so I cannot distinguish card mark stores from other stores.  Across a GC
+// point the Store Barrier and the card mark memory has to agree.  I cannot
+// have a card mark store and its barrier split across the GC point from
+// either above or below.  Here I get that to happen by reading ALL of memory.
+// A better answer would be to separate out card marks from other memory.
+// For now, return the input memory state, so that it can be reused
+// after the call, if this call has restricted memory effects.
+Node* GraphKit::set_predefined_input_for_runtime_call(SafePointNode* call) {
+  // Set fixed predefined input arguments
+  Node* memory = reset_memory();
+  call->init_req( TypeFunc::Control,   control()  );
+  call->init_req( TypeFunc::I_O,       top()      ); // does no i/o
+  call->init_req( TypeFunc::Memory,    memory     ); // may gc ptrs
+  call->init_req( TypeFunc::FramePtr,  frameptr() );
+  call->init_req( TypeFunc::ReturnAdr, top()      );
+  return memory;
+}
+
+//-------------------set_predefined_output_for_runtime_call--------------------
+// Set control and memory (not i_o) from the call.
+// If keep_mem is not NULL, use it for the output state,
+// except for the RawPtr output of the call, if hook_mem is TypeRawPtr::BOTTOM.
+// If hook_mem is NULL, this call produces no memory effects at all.
+// If hook_mem is a Java-visible memory slice (such as arraycopy operands),
+// then only that memory slice is taken from the call.
+// In the last case, we must put an appropriate memory barrier before
+// the call, so as to create the correct anti-dependencies on loads
+// preceding the call.
+void GraphKit::set_predefined_output_for_runtime_call(Node* call,
+                                                      Node* keep_mem,
+                                                      const TypePtr* hook_mem) {
+  // no i/o
+  set_control(_gvn.transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
+  if (keep_mem) {
+    // First clone the existing memory state
+    set_all_memory(keep_mem);
+    if (hook_mem != NULL) {
+      // Make memory for the call
+      Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+      // Set the RawPtr memory state only.  This covers all the heap top/GC stuff
+      // We also use hook_mem to extract specific effects from arraycopy stubs.
+      set_memory(mem, hook_mem);
+    }
+    // ...else the call has NO memory effects.
+
+    // Make sure the call advertises its memory effects precisely.
+    // This lets us build accurate anti-dependences in gcm.cpp.
+    assert(C->alias_type(call->adr_type()) == C->alias_type(hook_mem),
+           "call node must be constructed correctly");
+  } else {
+    assert(hook_mem == NULL, "");
+    // This is not a "slow path" call; all memory comes from the call.
+    set_all_memory_call(call);
+  }
+}
+
+//------------------------------increment_counter------------------------------
+// for statistics: increment a VM counter by 1
+
+void GraphKit::increment_counter(address counter_addr) {
+  Node* adr1 = makecon(TypeRawPtr::make(counter_addr));
+  increment_counter(adr1);
+}
+
+void GraphKit::increment_counter(Node* counter_addr) {
+  int adr_type = Compile::AliasIdxRaw;
+  Node* cnt  = make_load(NULL, counter_addr, TypeInt::INT, T_INT, adr_type);
+  Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+  store_to_memory( NULL, counter_addr, incr, T_INT, adr_type );
+}
+
+
+//------------------------------uncommon_trap----------------------------------
+// Bail out to the interpreter in mid-method.  Implemented by calling the
+// uncommon_trap blob.  This helper function inserts a runtime call with the
+// right debug info.
+void GraphKit::uncommon_trap(int trap_request,
+                             ciKlass* klass, const char* comment,
+                             bool must_throw,
+                             bool keep_exact_action) {
+  if (failing())  stop();
+  if (stopped())  return; // trap reachable?
+
+  // Note:  If ProfileTraps is true, and if a deopt. actually
+  // occurs here, the runtime will make sure an MDO exists.  There is
+  // no need to call method()->build_method_data() at this point.
+
+#ifdef ASSERT
+  if (!must_throw) {
+    // Make sure the stack has at least enough depth to execute
+    // the current bytecode.
+    int inputs, ignore;
+    if (compute_stack_effects(inputs, ignore)) {
+      assert(sp() >= inputs, "must have enough JVMS stack to execute");
+      // It is a frequent error in library_call.cpp to issue an
+      // uncommon trap with the _sp value already popped.
+    }
+  }
+#endif
+
+  Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(trap_request);
+  Deoptimization::DeoptAction action = Deoptimization::trap_request_action(trap_request);
+
+  switch (action) {
+  case Deoptimization::Action_maybe_recompile:
+  case Deoptimization::Action_reinterpret:
+    // Temporary fix for 6529811 to allow virtual calls to be sure they
+    // get the chance to go from mono->bi->mega
+    if (!keep_exact_action &&
+        Deoptimization::trap_request_index(trap_request) < 0 &&
+        too_many_recompiles(reason)) {
+      // This BCI is causing too many recompilations.
+      action = Deoptimization::Action_none;
+      trap_request = Deoptimization::make_trap_request(reason, action);
+    } else {
+      C->set_trap_can_recompile(true);
+    }
+    break;
+  case Deoptimization::Action_make_not_entrant:
+    C->set_trap_can_recompile(true);
+    break;
+#ifdef ASSERT
+  case Deoptimization::Action_none:
+  case Deoptimization::Action_make_not_compilable:
+    break;
+  default:
+    assert(false, "bad action");
+#endif
+  }
+
+  if (TraceOptoParse) {
+    char buf[100];
+    tty->print_cr("Uncommon trap %s at bci:%d",
+                  Deoptimization::format_trap_request(buf, sizeof(buf),
+                                                      trap_request), bci());
+  }
+
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    int kid = (klass == NULL)? -1: log->identify(klass);
+    log->begin_elem("uncommon_trap bci='%d'", bci());
+    char buf[100];
+    log->print(" %s", Deoptimization::format_trap_request(buf, sizeof(buf),
+                                                          trap_request));
+    if (kid >= 0)         log->print(" klass='%d'", kid);
+    if (comment != NULL)  log->print(" comment='%s'", comment);
+    log->end_elem();
+  }
+
+  // Make sure any guarding test views this path as very unlikely
+  Node *i0 = control()->in(0);
+  if (i0 != NULL && i0->is_If()) {        // Found a guarding if test?
+    IfNode *iff = i0->as_If();
+    float f = iff->_prob;   // Get prob
+    if (control()->Opcode() == Op_IfTrue) {
+      if (f > PROB_UNLIKELY_MAG(4))
+        iff->_prob = PROB_MIN;
+    } else {
+      if (f < PROB_LIKELY_MAG(4))
+        iff->_prob = PROB_MAX;
+    }
+  }
+
+  // Clear out dead values from the debug info.
+  kill_dead_locals();
+
+  // Now insert the uncommon trap subroutine call
+  address call_addr = SharedRuntime::uncommon_trap_blob()->instructions_begin();
+  const TypePtr* no_memory_effects = NULL;
+  // Pass the index of the class to be loaded
+  Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON |
+                                 (must_throw ? RC_MUST_THROW : 0),
+                                 OptoRuntime::uncommon_trap_Type(),
+                                 call_addr, "uncommon_trap", no_memory_effects,
+                                 intcon(trap_request));
+  assert(call->as_CallStaticJava()->uncommon_trap_request() == trap_request,
+         "must extract request correctly from the graph");
+  assert(trap_request != 0, "zero value reserved by uncommon_trap_request");
+
+  call->set_req(TypeFunc::ReturnAdr, returnadr());
+  // The debug info is the only real input to this call.
+
+  // Halt-and-catch fire here.  The above call should never return!
+  HaltNode* halt = new(C, TypeFunc::Parms) HaltNode(control(), frameptr());
+  _gvn.set_type_bottom(halt);
+  root()->add_req(halt);
+
+  stop_and_kill_map();
+}
+
+
+//--------------------------just_allocated_object------------------------------
+// Report the object that was just allocated.
+// It must be the case that there are no intervening safepoints.
+// We use this to determine if an object is so "fresh" that
+// it does not require card marks.
+Node* GraphKit::just_allocated_object(Node* current_control) {
+  if (C->recent_alloc_ctl() == current_control)
+    return C->recent_alloc_obj();
+  return NULL;
+}
+
+
+//------------------------------store_barrier----------------------------------
+// Insert a write-barrier store.  This is to let generational GC work; we have
+// to flag all oop-stores before the next GC point.
+void GraphKit::write_barrier_post(Node* oop_store, Node* obj, Node* adr,
+                                  Node* val, bool use_precise) {
+  // No store check needed if we're storing a NULL or an old object
+  // (latter case is probably a string constant). The concurrent
+  // mark sweep garbage collector, however, needs to have all nonNull
+  // oop updates flagged via card-marks.
+  if (val != NULL && val->is_Con()) {
+    // must be either an oop or NULL
+    const Type* t = val->bottom_type();
+    if (t == TypePtr::NULL_PTR || t == Type::TOP)
+      // stores of null never (?) need barriers
+      return;
+    ciObject* con = t->is_oopptr()->const_oop();
+    if (con != NULL
+        && con->is_perm()
+        && Universe::heap()->can_elide_permanent_oop_store_barriers())
+      // no store barrier needed, because no old-to-new ref created
+      return;
+  }
+
+  if (use_ReduceInitialCardMarks()
+      && obj == just_allocated_object(control())) {
+    // We can skip marks on a freshly-allocated object.
+    // Keep this code in sync with do_eager_card_mark in runtime.cpp.
+    // That routine eagerly marks the occasional object which is produced
+    // by the slow path, so that we don't have to do it here.
+    return;
+  }
+
+  if (!use_precise) {
+    // All card marks for a (non-array) instance are in one place:
+    adr = obj;
+  }
+  // (Else it's an array (or unknown), and we want more precise card marks.)
+  assert(adr != NULL, "");
+
+  // Get the alias_index for raw card-mark memory
+  int adr_type = Compile::AliasIdxRaw;
+  // Convert the pointer to an int prior to doing math on it
+  Node* cast = _gvn.transform(new (C, 2) CastP2XNode(control(), adr));
+  // Divide by card size
+  assert(Universe::heap()->barrier_set()->kind() == BarrierSet::CardTableModRef,
+         "Only one we handle so far.");
+  CardTableModRefBS* ct =
+    (CardTableModRefBS*)(Universe::heap()->barrier_set());
+  Node *b = _gvn.transform(new (C, 3) URShiftXNode( cast, _gvn.intcon(CardTableModRefBS::card_shift) ));
+  // We store into a byte array, so do not bother to left-shift by zero
+  // Get base of card map
+  assert(sizeof(*ct->byte_map_base) == sizeof(jbyte),
+         "adjust this code");
+  Node *c = makecon(TypeRawPtr::make((address)ct->byte_map_base));
+  // Combine
+  Node *sb_ctl = control();
+  Node *sb_adr = _gvn.transform(new (C, 4) AddPNode( top()/*no base ptr*/, c, b ));
+  Node *sb_val = _gvn.intcon(0);
+  // Smash zero into card
+  if( !UseConcMarkSweepGC ) {
+    BasicType bt = T_BYTE;
+    store_to_memory(sb_ctl, sb_adr, sb_val, bt, adr_type);
+  } else {
+    // Specialized path for CM store barrier
+    cms_card_mark( sb_ctl, sb_adr, sb_val, oop_store);
+  }
+}
+
+// Specialized path for CMS store barrier
+void GraphKit::cms_card_mark(Node* ctl, Node* adr, Node* val, Node *oop_store) {
+  BasicType bt = T_BYTE;
+  int adr_idx = Compile::AliasIdxRaw;
+  Node* mem = memory(adr_idx);
+
+  // The type input is NULL in PRODUCT builds
+  const TypePtr* type = NULL;
+  debug_only(type = C->get_adr_type(adr_idx));
+
+  // Add required edge to oop_store, optimizer does not support precedence edges.
+  // Convert required edge to precedence edge before allocation.
+  Node *store = _gvn.transform( new (C, 5) StoreCMNode(ctl, mem, adr, type, val, oop_store) );
+  set_memory(store, adr_idx);
+
+  // For CMS, back-to-back card-marks can only remove the first one
+  // and this requires DU info.  Push on worklist for optimizer.
+  if (mem->req() > MemNode::Address && adr == mem->in(MemNode::Address))
+    record_for_igvn(store);
+}
+
+
+void GraphKit::round_double_arguments(ciMethod* dest_method) {
+  // (Note:  TypeFunc::make has a cache that makes this fast.)
+  const TypeFunc* tf    = TypeFunc::make(dest_method);
+  int             nargs = tf->_domain->_cnt - TypeFunc::Parms;
+  for (int j = 0; j < nargs; j++) {
+    const Type *targ = tf->_domain->field_at(j + TypeFunc::Parms);
+    if( targ->basic_type() == T_DOUBLE ) {
+      // If any parameters are doubles, they must be rounded before
+      // the call, dstore_rounding does gvn.transform
+      Node *arg = argument(j);
+      arg = dstore_rounding(arg);
+      set_argument(j, arg);
+    }
+  }
+}
+
+void GraphKit::round_double_result(ciMethod* dest_method) {
+  // A non-strict method may return a double value which has an extended
+  // exponent, but this must not be visible in a caller which is 'strict'
+  // If a strict caller invokes a non-strict callee, round a double result
+
+  BasicType result_type = dest_method->return_type()->basic_type();
+  assert( method() != NULL, "must have caller context");
+  if( result_type == T_DOUBLE && method()->is_strict() && !dest_method->is_strict() ) {
+    // Destination method's return value is on top of stack
+    // dstore_rounding() does gvn.transform
+    Node *result = pop_pair();
+    result = dstore_rounding(result);
+    push_pair(result);
+  }
+}
+
+// rounding for strict float precision conformance
+Node* GraphKit::precision_rounding(Node* n) {
+  return UseStrictFP && _method->flags().is_strict()
+    && UseSSE == 0 && Matcher::strict_fp_requires_explicit_rounding
+    ? _gvn.transform( new (C, 2) RoundFloatNode(0, n) )
+    : n;
+}
+
+// rounding for strict double precision conformance
+Node* GraphKit::dprecision_rounding(Node *n) {
+  return UseStrictFP && _method->flags().is_strict()
+    && UseSSE <= 1 && Matcher::strict_fp_requires_explicit_rounding
+    ? _gvn.transform( new (C, 2) RoundDoubleNode(0, n) )
+    : n;
+}
+
+// rounding for non-strict double stores
+Node* GraphKit::dstore_rounding(Node* n) {
+  return Matcher::strict_fp_requires_explicit_rounding
+    && UseSSE <= 1
+    ? _gvn.transform( new (C, 2) RoundDoubleNode(0, n) )
+    : n;
+}
+
+//=============================================================================
+// Generate a fast path/slow path idiom.  Graph looks like:
+// [foo] indicates that 'foo' is a parameter
+//
+//              [in]     NULL
+//                 \    /
+//                  CmpP
+//                  Bool ne
+//                   If
+//                  /  \
+//              True    False-<2>
+//              / |
+//             /  cast_not_null
+//           Load  |    |   ^
+//        [fast_test]   |   |
+// gvn to   opt_test    |   |
+//          /    \      |  <1>
+//      True     False  |
+//        |         \\  |
+//   [slow_call]     \[fast_result]
+//    Ctl   Val       \      \
+//     |               \      \
+//    Catch       <1>   \      \
+//   /    \        ^     \      \
+//  Ex    No_Ex    |      \      \
+//  |       \   \  |       \ <2>  \
+//  ...      \  [slow_res] |  |    \   [null_result]
+//            \         \--+--+---  |  |
+//             \           | /    \ | /
+//              --------Region     Phi
+//
+//=============================================================================
+// Code is structured as a series of driver functions all called 'do_XXX' that
+// call a set of helper functions.  Helper functions first, then drivers.
+
+//------------------------------null_check_oop---------------------------------
+// Null check oop.  Set null-path control into Region in slot 3.
+// Make a cast-not-nullness use the other not-null control.  Return cast.
+Node* GraphKit::null_check_oop(Node* value, Node* *null_control,
+                               bool never_see_null) {
+  // Initial NULL check taken path
+  (*null_control) = top();
+  Node* cast = null_check_common(value, T_OBJECT, false, null_control);
+
+  // Generate uncommon_trap:
+  if (never_see_null && (*null_control) != top()) {
+    // If we see an unexpected null at a check-cast we record it and force a
+    // recompile; the offending check-cast will be compiled to handle NULLs.
+    // If we see more than one offending BCI, then all checkcasts in the
+    // method will be compiled to handle NULLs.
+    PreserveJVMState pjvms(this);
+    set_control(*null_control);
+    uncommon_trap(Deoptimization::Reason_null_check,
+                  Deoptimization::Action_make_not_entrant);
+    (*null_control) = top();    // NULL path is dead
+  }
+
+  // Cast away null-ness on the result
+  return cast;
+}
+
+//------------------------------opt_iff----------------------------------------
+// Optimize the fast-check IfNode.  Set the fast-path region slot 2.
+// Return slow-path control.
+Node* GraphKit::opt_iff(Node* region, Node* iff) {
+  IfNode *opt_iff = _gvn.transform(iff)->as_If();
+
+  // Fast path taken; set region slot 2
+  Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_iff) );
+  region->init_req(2,fast_taken); // Capture fast-control
+
+  // Fast path not-taken, i.e. slow path
+  Node *slow_taken = _gvn.transform( new (C, 1) IfTrueNode(opt_iff) );
+  return slow_taken;
+}
+
+//-----------------------------make_runtime_call-------------------------------
+Node* GraphKit::make_runtime_call(int flags,
+                                  const TypeFunc* call_type, address call_addr,
+                                  const char* call_name,
+                                  const TypePtr* adr_type,
+                                  // The following parms are all optional.
+                                  // The first NULL ends the list.
+                                  Node* parm0, Node* parm1,
+                                  Node* parm2, Node* parm3,
+                                  Node* parm4, Node* parm5,
+                                  Node* parm6, Node* parm7) {
+  // Slow-path call
+  int size = call_type->domain()->cnt();
+  bool is_leaf = !(flags & RC_NO_LEAF);
+  bool has_io  = (!is_leaf && !(flags & RC_NO_IO));
+  if (call_name == NULL) {
+    assert(!is_leaf, "must supply name for leaf");
+    call_name = OptoRuntime::stub_name(call_addr);
+  }
+  CallNode* call;
+  if (!is_leaf) {
+    call = new(C, size) CallStaticJavaNode(call_type, call_addr, call_name,
+                                           bci(), adr_type);
+  } else if (flags & RC_NO_FP) {
+    call = new(C, size) CallLeafNoFPNode(call_type, call_addr, call_name, adr_type);
+  } else {
+    call = new(C, size) CallLeafNode(call_type, call_addr, call_name, adr_type);
+  }
+
+  // The following is similar to set_edges_for_java_call,
+  // except that the memory effects of the call are restricted to AliasIdxRaw.
+
+  // Slow path call has no side-effects, uses few values
+  bool wide_in  = !(flags & RC_NARROW_MEM);
+  bool wide_out = (C->get_alias_index(adr_type) == Compile::AliasIdxBot);
+
+  Node* prev_mem = NULL;
+  if (wide_in) {
+    prev_mem = set_predefined_input_for_runtime_call(call);
+  } else {
+    assert(!wide_out, "narrow in => narrow out");
+    Node* narrow_mem = memory(adr_type);
+    prev_mem = reset_memory();
+    map()->set_memory(narrow_mem);
+    set_predefined_input_for_runtime_call(call);
+  }
+
+  // Hook each parm in order.  Stop looking at the first NULL.
+  if (parm0 != NULL) { call->init_req(TypeFunc::Parms+0, parm0);
+  if (parm1 != NULL) { call->init_req(TypeFunc::Parms+1, parm1);
+  if (parm2 != NULL) { call->init_req(TypeFunc::Parms+2, parm2);
+  if (parm3 != NULL) { call->init_req(TypeFunc::Parms+3, parm3);
+  if (parm4 != NULL) { call->init_req(TypeFunc::Parms+4, parm4);
+  if (parm5 != NULL) { call->init_req(TypeFunc::Parms+5, parm5);
+  if (parm6 != NULL) { call->init_req(TypeFunc::Parms+6, parm6);
+  if (parm7 != NULL) { call->init_req(TypeFunc::Parms+7, parm7);
+    /* close each nested if ===> */  } } } } } } } }
+  assert(call->in(call->req()-1) != NULL, "must initialize all parms");
+
+  if (!is_leaf) {
+    // Non-leaves can block and take safepoints:
+    add_safepoint_edges(call, ((flags & RC_MUST_THROW) != 0));
+  }
+  // Non-leaves can throw exceptions:
+  if (has_io) {
+    call->set_req(TypeFunc::I_O, i_o());
+  }
+
+  if (flags & RC_UNCOMMON) {
+    // Set the count to a tiny probability.  Cf. Estimate_Block_Frequency.
+    // (An "if" probability corresponds roughly to an unconditional count.
+    // Sort of.)
+    call->set_cnt(PROB_UNLIKELY_MAG(4));
+  }
+
+  Node* c = _gvn.transform(call);
+  assert(c == call, "cannot disappear");
+
+  if (wide_out) {
+    // Slow path call has full side-effects.
+    set_predefined_output_for_runtime_call(call);
+  } else {
+    // Slow path call has few side-effects, and/or sets few values.
+    set_predefined_output_for_runtime_call(call, prev_mem, adr_type);
+  }
+
+  if (has_io) {
+    set_i_o(_gvn.transform(new (C, 1) ProjNode(call, TypeFunc::I_O)));
+  }
+  return call;
+
+}
+
+//------------------------------merge_memory-----------------------------------
+// Merge memory from one path into the current memory state.
+void GraphKit::merge_memory(Node* new_mem, Node* region, int new_path) {
+  for (MergeMemStream mms(merged_memory(), new_mem->as_MergeMem()); mms.next_non_empty2(); ) {
+    Node* old_slice = mms.force_memory();
+    Node* new_slice = mms.memory2();
+    if (old_slice != new_slice) {
+      PhiNode* phi;
+      if (new_slice->is_Phi() && new_slice->as_Phi()->region() == region) {
+        phi = new_slice->as_Phi();
+        #ifdef ASSERT
+        if (old_slice->is_Phi() && old_slice->as_Phi()->region() == region)
+          old_slice = old_slice->in(new_path);
+        // Caller is responsible for ensuring that any pre-existing
+        // phis are already aware of old memory.
+        int old_path = (new_path > 1) ? 1 : 2;  // choose old_path != new_path
+        assert(phi->in(old_path) == old_slice, "pre-existing phis OK");
+        #endif
+        mms.set_memory(phi);
+      } else {
+        phi = PhiNode::make(region, old_slice, Type::MEMORY, mms.adr_type(C));
+        _gvn.set_type(phi, Type::MEMORY);
+        phi->set_req(new_path, new_slice);
+        mms.set_memory(_gvn.transform(phi));  // assume it is complete
+      }
+    }
+  }
+}
+
+//------------------------------make_slow_call_ex------------------------------
+// Make the exception handler hookups for the slow call
+void GraphKit::make_slow_call_ex(Node* call, ciInstanceKlass* ex_klass, bool separate_io_proj) {
+  if (stopped())  return;
+
+  // Make a catch node with just two handlers:  fall-through and catch-all
+  Node* i_o  = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::I_O, separate_io_proj) );
+  Node* catc = _gvn.transform( new (C, 2) CatchNode(control(), i_o, 2) );
+  Node* norm = _gvn.transform( new (C, 1) CatchProjNode(catc, CatchProjNode::fall_through_index, CatchProjNode::no_handler_bci) );
+  Node* excp = _gvn.transform( new (C, 1) CatchProjNode(catc, CatchProjNode::catch_all_index,    CatchProjNode::no_handler_bci) );
+
+  { PreserveJVMState pjvms(this);
+    set_control(excp);
+    set_i_o(i_o);
+
+    if (excp != top()) {
+      // Create an exception state also.
+      // Use an exact type if the caller has specified a specific exception.
+      const Type* ex_type = TypeOopPtr::make_from_klass_unique(ex_klass)->cast_to_ptr_type(TypePtr::NotNull);
+      Node*       ex_oop  = new (C, 2) CreateExNode(ex_type, control(), i_o);
+      add_exception_state(make_exception_state(_gvn.transform(ex_oop)));
+    }
+  }
+
+  // Get the no-exception control from the CatchNode.
+  set_control(norm);
+}
+
+
+//-------------------------------gen_subtype_check-----------------------------
+// Generate a subtyping check.  Takes as input the subtype and supertype.
+// Returns 2 values: sets the default control() to the true path and returns
+// the false path.  Only reads invariant memory; sets no (visible) memory.
+// The PartialSubtypeCheckNode sets the hidden 1-word cache in the encoding
+// but that's not exposed to the optimizer.  This call also doesn't take in an
+// Object; if you wish to check an Object you need to load the Object's class
+// prior to coming here.
+Node* GraphKit::gen_subtype_check(Node* subklass, Node* superklass) {
+  // Fast check for identical types, perhaps identical constants.
+  // The types can even be identical non-constants, in cases
+  // involving Array.newInstance, Object.clone, etc.
+  if (subklass == superklass)
+    return top();             // false path is dead; no test needed.
+
+  if (_gvn.type(superklass)->singleton()) {
+    ciKlass* superk = _gvn.type(superklass)->is_klassptr()->klass();
+    ciKlass* subk   = _gvn.type(subklass)->is_klassptr()->klass();
+
+    // In the common case of an exact superklass, try to fold up the
+    // test before generating code.  You may ask, why not just generate
+    // the code and then let it fold up?  The answer is that the generated
+    // code will necessarily include null checks, which do not always
+    // completely fold away.  If they are also needless, then they turn
+    // into a performance loss.  Example:
+    //    Foo[] fa = blah(); Foo x = fa[0]; fa[1] = x;
+    // Here, the type of 'fa' is often exact, so the store check
+    // of fa[1]=x will fold up, without testing the nullness of x.
+    switch (static_subtype_check(superk, subk)) {
+    case SSC_always_false:
+      {
+        Node* always_fail = control();
+        set_control(top());
+        return always_fail;
+      }
+    case SSC_always_true:
+      return top();
+    case SSC_easy_test:
+      {
+        // Just do a direct pointer compare and be done.
+        Node* cmp = _gvn.transform( new(C, 3) CmpPNode(subklass, superklass) );
+        Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+        IfNode* iff = create_and_xform_if(control(), bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+        set_control( _gvn.transform( new(C, 1) IfTrueNode (iff) ) );
+        return       _gvn.transform( new(C, 1) IfFalseNode(iff) );
+      }
+    case SSC_full_test:
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+  }
+
+  // %%% Possible further optimization:  Even if the superklass is not exact,
+  // if the subklass is the unique subtype of the superklass, the check
+  // will always succeed.  We could leave a dependency behind to ensure this.
+
+  // First load the super-klass's check-offset
+  Node *p1 = basic_plus_adr( superklass, superklass, sizeof(oopDesc) + Klass::super_check_offset_offset_in_bytes() );
+  Node *chk_off = _gvn.transform( new (C, 3) LoadINode( NULL, memory(p1), p1, _gvn.type(p1)->is_ptr() ) );
+  int cacheoff_con = sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes();
+  bool might_be_cache = (find_int_con(chk_off, cacheoff_con) == cacheoff_con);
+
+  // Load from the sub-klass's super-class display list, or a 1-word cache of
+  // the secondary superclass list, or a failing value with a sentinel offset
+  // if the super-klass is an interface or exceptionally deep in the Java
+  // hierarchy and we have to scan the secondary superclass list the hard way.
+  // Worst-case type is a little odd: NULL is allowed as a result (usually
+  // klass loads can never produce a NULL).
+  Node *chk_off_X = ConvI2X(chk_off);
+  Node *p2 = _gvn.transform( new (C, 4) AddPNode(subklass,subklass,chk_off_X) );
+  // For some types like interfaces the following loadKlass is from a 1-word
+  // cache which is mutable so can't use immutable memory.  Other
+  // types load from the super-class display table which is immutable.
+  Node *kmem = might_be_cache ? memory(p2) : immutable_memory();
+  Node *nkls = _gvn.transform( new (C, 3) LoadKlassNode( NULL, kmem, p2, _gvn.type(p2)->is_ptr(), TypeKlassPtr::OBJECT_OR_NULL ) );
+
+  // Compile speed common case: ARE a subtype and we canNOT fail
+  if( superklass == nkls )
+    return top();             // false path is dead; no test needed.
+
+  // See if we get an immediate positive hit.  Happens roughly 83% of the
+  // time.  Test to see if the value loaded just previously from the subklass
+  // is exactly the superklass.
+  Node *cmp1 = _gvn.transform( new (C, 3) CmpPNode( superklass, nkls ) );
+  Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp1, BoolTest::eq ) );
+  IfNode *iff1 = create_and_xform_if( control(), bol1, PROB_LIKELY(0.83f), COUNT_UNKNOWN );
+  Node *iftrue1 = _gvn.transform( new (C, 1) IfTrueNode ( iff1 ) );
+  set_control(    _gvn.transform( new (C, 1) IfFalseNode( iff1 ) ) );
+
+  // Compile speed common case: Check for being deterministic right now.  If
+  // chk_off is a constant and not equal to cacheoff then we are NOT a
+  // subklass.  In this case we need exactly the 1 test above and we can
+  // return those results immediately.
+  if (!might_be_cache) {
+    Node* not_subtype_ctrl = control();
+    set_control(iftrue1); // We need exactly the 1 test above
+    return not_subtype_ctrl;
+  }
+
+  // Gather the various success & failures here
+  RegionNode *r_ok_subtype = new (C, 4) RegionNode(4);
+  record_for_igvn(r_ok_subtype);
+  RegionNode *r_not_subtype = new (C, 3) RegionNode(3);
+  record_for_igvn(r_not_subtype);
+
+  r_ok_subtype->init_req(1, iftrue1);
+
+  // Check for immediate negative hit.  Happens roughly 11% of the time (which
+  // is roughly 63% of the remaining cases).  Test to see if the loaded
+  // check-offset points into the subklass display list or the 1-element
+  // cache.  If it points to the display (and NOT the cache) and the display
+  // missed then it's not a subtype.
+  Node *cacheoff = _gvn.intcon(cacheoff_con);
+  Node *cmp2 = _gvn.transform( new (C, 3) CmpINode( chk_off, cacheoff ) );
+  Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmp2, BoolTest::ne ) );
+  IfNode *iff2 = create_and_xform_if( control(), bol2, PROB_LIKELY(0.63f), COUNT_UNKNOWN );
+  r_not_subtype->init_req(1, _gvn.transform( new (C, 1) IfTrueNode (iff2) ) );
+  set_control(                _gvn.transform( new (C, 1) IfFalseNode(iff2) ) );
+
+  // Check for self.  Very rare to get here, but its taken 1/3 the time.
+  // No performance impact (too rare) but allows sharing of secondary arrays
+  // which has some footprint reduction.
+  Node *cmp3 = _gvn.transform( new (C, 3) CmpPNode( subklass, superklass ) );
+  Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmp3, BoolTest::eq ) );
+  IfNode *iff3 = create_and_xform_if( control(), bol3, PROB_LIKELY(0.36f), COUNT_UNKNOWN );
+  r_ok_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode ( iff3 ) ) );
+  set_control(               _gvn.transform( new (C, 1) IfFalseNode( iff3 ) ) );
+
+  // Now do a linear scan of the secondary super-klass array.  Again, no real
+  // performance impact (too rare) but it's gotta be done.
+  // (The stub also contains the self-check of subklass == superklass.
+  // Since the code is rarely used, there is no penalty for moving it
+  // out of line, and it can only improve I-cache density.)
+  Node* psc = _gvn.transform(
+    new (C, 3) PartialSubtypeCheckNode(control(), subklass, superklass) );
+
+  Node *cmp4 = _gvn.transform( new (C, 3) CmpPNode( psc, null() ) );
+  Node *bol4 = _gvn.transform( new (C, 2) BoolNode( cmp4, BoolTest::ne ) );
+  IfNode *iff4 = create_and_xform_if( control(), bol4, PROB_FAIR, COUNT_UNKNOWN );
+  r_not_subtype->init_req(2, _gvn.transform( new (C, 1) IfTrueNode (iff4) ) );
+  r_ok_subtype ->init_req(3, _gvn.transform( new (C, 1) IfFalseNode(iff4) ) );
+
+  // Return false path; set default control to true path.
+  set_control( _gvn.transform(r_ok_subtype) );
+  return _gvn.transform(r_not_subtype);
+}
+
+//----------------------------static_subtype_check-----------------------------
+// Shortcut important common cases when superklass is exact:
+// (0) superklass is java.lang.Object (can occur in reflective code)
+// (1) subklass is already limited to a subtype of superklass => always ok
+// (2) subklass does not overlap with superklass => always fail
+// (3) superklass has NO subtypes and we can check with a simple compare.
+int GraphKit::static_subtype_check(ciKlass* superk, ciKlass* subk) {
+  if (StressReflectiveCode) {
+    return SSC_full_test;       // Let caller generate the general case.
+  }
+
+  if (superk == env()->Object_klass()) {
+    return SSC_always_true;     // (0) this test cannot fail
+  }
+
+  ciType* superelem = superk;
+  if (superelem->is_array_klass())
+    superelem = superelem->as_array_klass()->base_element_type();
+
+  if (!subk->is_interface()) {  // cannot trust static interface types yet
+    if (subk->is_subtype_of(superk)) {
+      return SSC_always_true;   // (1) false path dead; no dynamic test needed
+    }
+    if (!(superelem->is_klass() && superelem->as_klass()->is_interface()) &&
+        !superk->is_subtype_of(subk)) {
+      return SSC_always_false;
+    }
+  }
+
+  // If casting to an instance klass, it must have no subtypes
+  if (superk->is_interface()) {
+    // Cannot trust interfaces yet.
+    // %%% S.B. superk->nof_implementors() == 1
+  } else if (superelem->is_instance_klass()) {
+    ciInstanceKlass* ik = superelem->as_instance_klass();
+    if (!ik->has_subklass() && !ik->is_interface()) {
+      if (!ik->is_final()) {
+        // Add a dependency if there is a chance of a later subclass.
+        C->dependencies()->assert_leaf_type(ik);
+      }
+      return SSC_easy_test;     // (3) caller can do a simple ptr comparison
+    }
+  } else {
+    // A primitive array type has no subtypes.
+    return SSC_easy_test;       // (3) caller can do a simple ptr comparison
+  }
+
+  return SSC_full_test;
+}
+
+// Profile-driven exact type check:
+Node* GraphKit::type_check_receiver(Node* receiver, ciKlass* klass,
+                                    float prob,
+                                    Node* *casted_receiver) {
+  const TypeKlassPtr* tklass = TypeKlassPtr::make(klass);
+  Node* recv_klass = load_object_klass(receiver);
+  Node* want_klass = makecon(tklass);
+  Node* cmp = _gvn.transform( new(C, 3) CmpPNode(recv_klass, want_klass) );
+  Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+  IfNode* iff = create_and_xform_if(control(), bol, prob, COUNT_UNKNOWN);
+  set_control( _gvn.transform( new(C, 1) IfTrueNode (iff) ));
+  Node* fail = _gvn.transform( new(C, 1) IfFalseNode(iff) );
+
+  const TypeOopPtr* recv_xtype = tklass->as_instance_type();
+  assert(recv_xtype->klass_is_exact(), "");
+
+  // Subsume downstream occurrences of receiver with a cast to
+  // recv_xtype, since now we know what the type will be.
+  Node* cast = new(C, 2) CheckCastPPNode(control(), receiver, recv_xtype);
+  (*casted_receiver) = _gvn.transform(cast);
+  // (User must make the replace_in_map call.)
+
+  return fail;
+}
+
+
+//-------------------------------gen_instanceof--------------------------------
+// Generate an instance-of idiom.  Used by both the instance-of bytecode
+// and the reflective instance-of call.
+Node* GraphKit::gen_instanceof( Node *subobj, Node* superklass ) {
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+  assert( !stopped(), "dead parse path should be checked in callers" );
+  assert(!TypePtr::NULL_PTR->higher_equal(_gvn.type(superklass)->is_klassptr()),
+         "must check for not-null not-dead klass in callers");
+
+  // Make the merge point
+  enum { _obj_path = 1, _fail_path, _null_path, PATH_LIMIT };
+  RegionNode* region = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  Node*       phi    = new(C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  // Null check; get casted pointer; set region slot 3
+  Node* null_ctl = top();
+  Node* not_null_obj = null_check_oop(subobj, &null_ctl);
+
+  // If not_null_obj is dead, only null-path is taken
+  if (stopped()) {              // Doing instance-of on a NULL?
+    set_control(null_ctl);
+    return intcon(0);
+  }
+  region->init_req(_null_path, null_ctl);
+  phi   ->init_req(_null_path, intcon(0)); // Set null path value
+
+  // Load the object's klass
+  Node* obj_klass = load_object_klass(not_null_obj);
+
+  // Generate the subtype check
+  Node* not_subtype_ctrl = gen_subtype_check(obj_klass, superklass);
+
+  // Plug in the success path to the general merge in slot 1.
+  region->init_req(_obj_path, control());
+  phi   ->init_req(_obj_path, intcon(1));
+
+  // Plug in the failing path to the general merge in slot 2.
+  region->init_req(_fail_path, not_subtype_ctrl);
+  phi   ->init_req(_fail_path, intcon(0));
+
+  // Return final merged results
+  set_control( _gvn.transform(region) );
+  record_for_igvn(region);
+  return _gvn.transform(phi);
+}
+
+//-------------------------------gen_checkcast---------------------------------
+// Generate a checkcast idiom.  Used by both the checkcast bytecode and the
+// array store bytecode.  Stack must be as-if BEFORE doing the bytecode so the
+// uncommon-trap paths work.  Adjust stack after this call.
+// If failure_control is supplied and not null, it is filled in with
+// the control edge for the cast failure.  Otherwise, an appropriate
+// uncommon trap or exception is thrown.
+Node* GraphKit::gen_checkcast(Node *obj, Node* superklass,
+                              Node* *failure_control) {
+  kill_dead_locals();           // Benefit all the uncommon traps
+  const TypeKlassPtr *tk = _gvn.type(superklass)->is_klassptr();
+  const Type *toop = TypeOopPtr::make_from_klass(tk->klass());
+
+  // Fast cutout:  Check the case that the cast is vacuously true.
+  // This detects the common cases where the test will short-circuit
+  // away completely.  We do this before we perform the null check,
+  // because if the test is going to turn into zero code, we don't
+  // want a residual null check left around.  (Causes a slowdown,
+  // for example, in some objArray manipulations, such as a[i]=a[j].)
+  if (tk->singleton()) {
+    const TypeOopPtr* objtp = _gvn.type(obj)->isa_oopptr();
+    if (objtp != NULL && objtp->klass() != NULL) {
+      switch (static_subtype_check(tk->klass(), objtp->klass())) {
+      case SSC_always_true:
+        return obj;
+      case SSC_always_false:
+        // It needs a null check because a null will *pass* the cast check.
+        // A non-null value will always produce an exception.
+        return do_null_assert(obj, T_OBJECT);
+      }
+    }
+  }
+
+  ciProfileData* data = NULL;
+  if (failure_control == NULL) {        // use MDO in regular case only
+    assert(java_bc() == Bytecodes::_aastore ||
+           java_bc() == Bytecodes::_checkcast,
+           "interpreter profiles type checks only for these BCs");
+    data = method()->method_data()->bci_to_data(bci());
+  }
+
+  // Make the merge point
+  enum { _obj_path = 1, _null_path, PATH_LIMIT };
+  RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  Node*       phi    = new (C, PATH_LIMIT) PhiNode(region, toop);
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  // Use null-cast information if it is available
+  bool never_see_null = false;
+  // If we see an unexpected null at a check-cast we record it and force a
+  // recompile; the offending check-cast will be compiled to handle NULLs.
+  // If we see several offending BCIs, then all checkcasts in the
+  // method will be compiled to handle NULLs.
+  if (UncommonNullCast            // Cutout for this technique
+      && failure_control == NULL  // regular case
+      && obj != null()            // And not the -Xcomp stupid case?
+      && !too_many_traps(Deoptimization::Reason_null_check)) {
+    // Finally, check the "null_seen" bit from the interpreter.
+    if (data == NULL || !data->as_BitData()->null_seen()) {
+      never_see_null = true;
+    }
+  }
+
+  // Null check; get casted pointer; set region slot 3
+  Node* null_ctl = top();
+  Node* not_null_obj = null_check_oop(obj, &null_ctl, never_see_null);
+
+  // If not_null_obj is dead, only null-path is taken
+  if (stopped()) {              // Doing instance-of on a NULL?
+    set_control(null_ctl);
+    return null();
+  }
+  region->init_req(_null_path, null_ctl);
+  phi   ->init_req(_null_path, null());  // Set null path value
+
+  Node* cast_obj = NULL;        // the casted version of the object
+
+  // If the profile has seen exactly one type, narrow to that type.
+  // (The subsequent subtype check will always fold up.)
+  if (UseTypeProfile && TypeProfileCasts && data != NULL &&
+      // Counter has never been decremented (due to cast failure).
+      // ...This is a reasonable thing to expect.  It is true of
+      // all casts inserted by javac to implement generic types.
+      data->as_CounterData()->count() >= 0 &&
+      !too_many_traps(Deoptimization::Reason_class_check)) {
+    // (No, this isn't a call, but it's enough like a virtual call
+    // to use the same ciMethod accessor to get the profile info...)
+    ciCallProfile profile = method()->call_profile_at_bci(bci());
+    if (profile.count() >= 0 &&         // no cast failures here
+        profile.has_receiver(0) &&
+        profile.morphism() == 1) {
+      ciKlass* exact_kls = profile.receiver(0);
+      int ssc = static_subtype_check(tk->klass(), exact_kls);
+      if (ssc == SSC_always_true) {
+        // If we narrow the type to match what the type profile sees,
+        // we can then remove the rest of the cast.
+        // This is a win, even if the exact_kls is very specific,
+        // because downstream operations, such as method calls,
+        // will often benefit from the sharper type.
+        Node* exact_obj = not_null_obj; // will get updated in place...
+        Node* slow_ctl  = type_check_receiver(exact_obj, exact_kls, 1.0,
+                                              &exact_obj);
+        { PreserveJVMState pjvms(this);
+          set_control(slow_ctl);
+          uncommon_trap(Deoptimization::Reason_class_check,
+                        Deoptimization::Action_maybe_recompile);
+        }
+        if (failure_control != NULL) // failure is now impossible
+          (*failure_control) = top();
+        replace_in_map(not_null_obj, exact_obj);
+        // adjust the type of the phi to the exact klass:
+        phi->raise_bottom_type(_gvn.type(exact_obj)->meet(TypePtr::NULL_PTR));
+        cast_obj = exact_obj;
+      }
+      // assert(cast_obj != NULL)... except maybe the profile lied to us.
+    }
+  }
+
+  if (cast_obj == NULL) {
+    // Load the object's klass
+    Node* obj_klass = load_object_klass(not_null_obj);
+
+    // Generate the subtype check
+    Node* not_subtype_ctrl = gen_subtype_check( obj_klass, superklass );
+
+    // Plug in success path into the merge
+    cast_obj = _gvn.transform(new (C, 2) CheckCastPPNode(control(),
+                                                         not_null_obj, toop));
+    // Failure path ends in uncommon trap (or may be dead - failure impossible)
+    if (failure_control == NULL) {
+      if (not_subtype_ctrl != top()) { // If failure is possible
+        PreserveJVMState pjvms(this);
+        set_control(not_subtype_ctrl);
+        builtin_throw(Deoptimization::Reason_class_check, obj_klass);
+      }
+    } else {
+      (*failure_control) = not_subtype_ctrl;
+    }
+  }
+
+  region->init_req(_obj_path, control());
+  phi   ->init_req(_obj_path, cast_obj);
+
+  // A merge of NULL or Casted-NotNull obj
+  Node* res = _gvn.transform(phi);
+
+  // Note I do NOT always 'replace_in_map(obj,result)' here.
+  //  if( tk->klass()->can_be_primary_super()  )
+    // This means that if I successfully store an Object into an array-of-String
+    // I 'forget' that the Object is really now known to be a String.  I have to
+    // do this because we don't have true union types for interfaces - if I store
+    // a Baz into an array-of-Interface and then tell the optimizer it's an
+    // Interface, I forget that it's also a Baz and cannot do Baz-like field
+    // references to it.  FIX THIS WHEN UNION TYPES APPEAR!
+  //  replace_in_map( obj, res );
+
+  // Return final merged results
+  set_control( _gvn.transform(region) );
+  record_for_igvn(region);
+  return res;
+}
+
+//------------------------------next_monitor-----------------------------------
+// What number should be given to the next monitor?
+int GraphKit::next_monitor() {
+  int current = jvms()->monitor_depth()* C->sync_stack_slots();
+  int next = current + C->sync_stack_slots();
+  // Keep the toplevel high water mark current:
+  if (C->fixed_slots() < next)  C->set_fixed_slots(next);
+  return current;
+}
+
+//------------------------------insert_mem_bar---------------------------------
+// Memory barrier to avoid floating things around
+// The membar serves as a pinch point between both control and all memory slices.
+Node* GraphKit::insert_mem_bar(int opcode, Node* precedent) {
+  MemBarNode* mb = MemBarNode::make(C, opcode, Compile::AliasIdxBot, precedent);
+  mb->init_req(TypeFunc::Control, control());
+  mb->init_req(TypeFunc::Memory,  reset_memory());
+  Node* membar = _gvn.transform(mb);
+  set_control(_gvn.transform(new (C, 1) ProjNode(membar,TypeFunc::Control) ));
+  set_all_memory_call(membar);
+  return membar;
+}
+
+//-------------------------insert_mem_bar_volatile----------------------------
+// Memory barrier to avoid floating things around
+// The membar serves as a pinch point between both control and memory(alias_idx).
+// If you want to make a pinch point on all memory slices, do not use this
+// function (even with AliasIdxBot); use insert_mem_bar() instead.
+Node* GraphKit::insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent) {
+  // When Parse::do_put_xxx updates a volatile field, it appends a series
+  // of MemBarVolatile nodes, one for *each* volatile field alias category.
+  // The first membar is on the same memory slice as the field store opcode.
+  // This forces the membar to follow the store.  (Bug 6500685 broke this.)
+  // All the other membars (for other volatile slices, including AliasIdxBot,
+  // which stands for all unknown volatile slices) are control-dependent
+  // on the first membar.  This prevents later volatile loads or stores
+  // from sliding up past the just-emitted store.
+
+  MemBarNode* mb = MemBarNode::make(C, opcode, alias_idx, precedent);
+  mb->set_req(TypeFunc::Control,control());
+  if (alias_idx == Compile::AliasIdxBot) {
+    mb->set_req(TypeFunc::Memory, merged_memory()->base_memory());
+  } else {
+    assert(!(opcode == Op_Initialize && alias_idx != Compile::AliasIdxRaw), "fix caller");
+    mb->set_req(TypeFunc::Memory, memory(alias_idx));
+  }
+  Node* membar = _gvn.transform(mb);
+  set_control(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Control)));
+  if (alias_idx == Compile::AliasIdxBot) {
+    merged_memory()->set_base_memory(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Memory)));
+  } else {
+    set_memory(_gvn.transform(new (C, 1) ProjNode(membar, TypeFunc::Memory)),alias_idx);
+  }
+  return membar;
+}
+
+//------------------------------shared_lock------------------------------------
+// Emit locking code.
+FastLockNode* GraphKit::shared_lock(Node* obj) {
+  // bci is either a monitorenter bc or InvocationEntryBci
+  // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+  assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+
+  if( !GenerateSynchronizationCode )
+    return NULL;                // Not locking things?
+  if (stopped())                // Dead monitor?
+    return NULL;
+
+  assert(dead_locals_are_killed(), "should kill locals before sync. point");
+
+  // Box the stack location
+  Node* box = _gvn.transform(new (C, 1) BoxLockNode(next_monitor()));
+  Node* mem = reset_memory();
+
+  FastLockNode * flock = _gvn.transform(new (C, 3) FastLockNode(0, obj, box) )->as_FastLock();
+  if (PrintPreciseBiasedLockingStatistics) {
+    // Create the counters for this fast lock.
+    flock->create_lock_counter(sync_jvms()); // sync_jvms used to get current bci
+  }
+  // Add monitor to debug info for the slow path.  If we block inside the
+  // slow path and de-opt, we need the monitor hanging around
+  map()->push_monitor( flock );
+
+  const TypeFunc *tf = LockNode::lock_type();
+  LockNode *lock = new (C, tf->domain()->cnt()) LockNode(C, tf);
+
+  lock->init_req( TypeFunc::Control, control() );
+  lock->init_req( TypeFunc::Memory , mem );
+  lock->init_req( TypeFunc::I_O    , top() )     ;   // does no i/o
+  lock->init_req( TypeFunc::FramePtr, frameptr() );
+  lock->init_req( TypeFunc::ReturnAdr, top() );
+
+  lock->init_req(TypeFunc::Parms + 0, obj);
+  lock->init_req(TypeFunc::Parms + 1, box);
+  lock->init_req(TypeFunc::Parms + 2, flock);
+  add_safepoint_edges(lock);
+
+  lock = _gvn.transform( lock )->as_Lock();
+
+  // lock has no side-effects, sets few values
+  set_predefined_output_for_runtime_call(lock, mem, TypeRawPtr::BOTTOM);
+
+  insert_mem_bar(Op_MemBarAcquire);
+
+  // Add this to the worklist so that the lock can be eliminated
+  record_for_igvn(lock);
+
+#ifndef PRODUCT
+  if (PrintLockStatistics) {
+    // Update the counter for this lock.  Don't bother using an atomic
+    // operation since we don't require absolute accuracy.
+    lock->create_lock_counter(map()->jvms());
+    int adr_type = Compile::AliasIdxRaw;
+    Node* counter_addr = makecon(TypeRawPtr::make(lock->counter()->addr()));
+    Node* cnt  = make_load(NULL, counter_addr, TypeInt::INT, T_INT, adr_type);
+    Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+    store_to_memory(control(), counter_addr, incr, T_INT, adr_type);
+  }
+#endif
+
+  return flock;
+}
+
+
+//------------------------------shared_unlock----------------------------------
+// Emit unlocking code.
+void GraphKit::shared_unlock(Node* box, Node* obj) {
+  // bci is either a monitorenter bc or InvocationEntryBci
+  // %%% SynchronizationEntryBCI is redundant; use InvocationEntryBci in interfaces
+  assert(SynchronizationEntryBCI == InvocationEntryBci, "");
+
+  if( !GenerateSynchronizationCode )
+    return;
+  if (stopped()) {               // Dead monitor?
+    map()->pop_monitor();        // Kill monitor from debug info
+    return;
+  }
+
+  // Memory barrier to avoid floating things down past the locked region
+  insert_mem_bar(Op_MemBarRelease);
+
+  const TypeFunc *tf = OptoRuntime::complete_monitor_exit_Type();
+  UnlockNode *unlock = new (C, tf->domain()->cnt()) UnlockNode(C, tf);
+  uint raw_idx = Compile::AliasIdxRaw;
+  unlock->init_req( TypeFunc::Control, control() );
+  unlock->init_req( TypeFunc::Memory , memory(raw_idx) );
+  unlock->init_req( TypeFunc::I_O    , top() )     ;   // does no i/o
+  unlock->init_req( TypeFunc::FramePtr, frameptr() );
+  unlock->init_req( TypeFunc::ReturnAdr, top() );
+
+  unlock->init_req(TypeFunc::Parms + 0, obj);
+  unlock->init_req(TypeFunc::Parms + 1, box);
+  unlock = _gvn.transform(unlock)->as_Unlock();
+
+  Node* mem = reset_memory();
+
+  // unlock has no side-effects, sets few values
+  set_predefined_output_for_runtime_call(unlock, mem, TypeRawPtr::BOTTOM);
+
+  // Kill monitor from debug info
+  map()->pop_monitor( );
+}
+
+//-------------------------------get_layout_helper-----------------------------
+// If the given klass is a constant or known to be an array,
+// fetch the constant layout helper value into constant_value
+// and return (Node*)NULL.  Otherwise, load the non-constant
+// layout helper value, and return the node which represents it.
+// This two-faced routine is useful because allocation sites
+// almost always feature constant types.
+Node* GraphKit::get_layout_helper(Node* klass_node, jint& constant_value) {
+  const TypeKlassPtr* inst_klass = _gvn.type(klass_node)->isa_klassptr();
+  if (!StressReflectiveCode && inst_klass != NULL) {
+    ciKlass* klass = inst_klass->klass();
+    bool    xklass = inst_klass->klass_is_exact();
+    if (xklass || klass->is_array_klass()) {
+      jint lhelper = klass->layout_helper();
+      if (lhelper != Klass::_lh_neutral_value) {
+        constant_value = lhelper;
+        return (Node*) NULL;
+      }
+    }
+  }
+  constant_value = Klass::_lh_neutral_value;  // put in a known value
+  Node* lhp = basic_plus_adr(klass_node, klass_node, Klass::layout_helper_offset_in_bytes() + sizeof(oopDesc));
+  return make_load(NULL, lhp, TypeInt::INT, T_INT);
+}
+
+// We just put in an allocate/initialize with a big raw-memory effect.
+// Hook selected additional alias categories on the initialization.
+static void hook_memory_on_init(GraphKit& kit, int alias_idx,
+                                MergeMemNode* init_in_merge,
+                                Node* init_out_raw) {
+  DEBUG_ONLY(Node* init_in_raw = init_in_merge->base_memory());
+  assert(init_in_merge->memory_at(alias_idx) == init_in_raw, "");
+
+  Node* prevmem = kit.memory(alias_idx);
+  init_in_merge->set_memory_at(alias_idx, prevmem);
+  kit.set_memory(init_out_raw, alias_idx);
+}
+
+//---------------------------set_output_for_allocation-------------------------
+Node* GraphKit::set_output_for_allocation(AllocateNode* alloc,
+                                          const TypeOopPtr* oop_type,
+                                          bool raw_mem_only) {
+  int rawidx = Compile::AliasIdxRaw;
+  alloc->set_req( TypeFunc::FramePtr, frameptr() );
+  add_safepoint_edges(alloc);
+  Node* allocx = _gvn.transform(alloc);
+  set_control( _gvn.transform(new (C, 1) ProjNode(allocx, TypeFunc::Control) ) );
+  // create memory projection for i_o
+  set_memory ( _gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::Memory, true) ), rawidx );
+  make_slow_call_ex(allocx, env()->OutOfMemoryError_klass(), true);
+
+  // create a memory projection as for the normal control path
+  Node* malloc = _gvn.transform(new (C, 1) ProjNode(allocx, TypeFunc::Memory));
+  set_memory(malloc, rawidx);
+
+  // a normal slow-call doesn't change i_o, but an allocation does
+  // we create a separate i_o projection for the normal control path
+  set_i_o(_gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::I_O, false) ) );
+  Node* rawoop = _gvn.transform( new (C, 1) ProjNode(allocx, TypeFunc::Parms) );
+
+  // put in an initialization barrier
+  InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, rawidx,
+                                                 rawoop)->as_Initialize();
+  assert(alloc->initialization() == init,  "2-way macro link must work");
+  assert(init ->allocation()     == alloc, "2-way macro link must work");
+  if (ReduceFieldZeroing && !raw_mem_only) {
+    // Extract memory strands which may participate in the new object's
+    // initialization, and source them from the new InitializeNode.
+    // This will allow us to observe initializations when they occur,
+    // and link them properly (as a group) to the InitializeNode.
+    Node* klass_node = alloc->in(AllocateNode::KlassNode);
+    assert(init->in(InitializeNode::Memory) == malloc, "");
+    MergeMemNode* minit_in = MergeMemNode::make(C, malloc);
+    init->set_req(InitializeNode::Memory, minit_in);
+    record_for_igvn(minit_in); // fold it up later, if possible
+    Node* minit_out = memory(rawidx);
+    assert(minit_out->is_Proj() && minit_out->in(0) == init, "");
+    if (oop_type->isa_aryptr()) {
+      const TypePtr* telemref = oop_type->add_offset(Type::OffsetBot);
+      int            elemidx  = C->get_alias_index(telemref);
+      hook_memory_on_init(*this, elemidx, minit_in, minit_out);
+    } else if (oop_type->isa_instptr()) {
+      ciInstanceKlass* ik = oop_type->klass()->as_instance_klass();
+      for (int i = 0, len = ik->nof_nonstatic_fields(); i < len; i++) {
+        ciField* field = ik->nonstatic_field_at(i);
+        if (field->offset() >= TrackedInitializationLimit)
+          continue;  // do not bother to track really large numbers of fields
+        // Find (or create) the alias category for this field:
+        int fieldidx = C->alias_type(field)->index();
+        hook_memory_on_init(*this, fieldidx, minit_in, minit_out);
+      }
+    }
+  }
+
+  // Cast raw oop to the real thing...
+  Node* javaoop = new (C, 2) CheckCastPPNode(control(), rawoop, oop_type);
+  javaoop = _gvn.transform(javaoop);
+  C->set_recent_alloc(control(), javaoop);
+  assert(just_allocated_object(control()) == javaoop, "just allocated");
+
+#ifdef ASSERT
+  { // Verify that the AllocateNode::Ideal_foo recognizers work:
+    Node* kn = alloc->in(AllocateNode::KlassNode);
+    Node* ln = alloc->in(AllocateNode::ALength);
+    assert(AllocateNode::Ideal_klass(rawoop, &_gvn) == kn,
+           "Ideal_klass works");
+    assert(AllocateNode::Ideal_klass(javaoop, &_gvn) == kn,
+           "Ideal_klass works");
+    if (alloc->is_AllocateArray()) {
+      assert(AllocateArrayNode::Ideal_length(rawoop, &_gvn) == ln,
+             "Ideal_length works");
+      assert(AllocateArrayNode::Ideal_length(javaoop, &_gvn) == ln,
+             "Ideal_length works");
+    } else {
+      assert(ln->is_top(), "no length, please");
+    }
+  }
+#endif //ASSERT
+
+  return javaoop;
+}
+
+//---------------------------new_instance--------------------------------------
+// This routine takes a klass_node which may be constant (for a static type)
+// or may be non-constant (for reflective code).  It will work equally well
+// for either, and the graph will fold nicely if the optimizer later reduces
+// the type to a constant.
+// The optional arguments are for specialized use by intrinsics:
+//  - If 'extra_slow_test' if not null is an extra condition for the slow-path.
+//  - If 'raw_mem_only', do not cast the result to an oop.
+//  - If 'return_size_val', report the the total object size to the caller.
+Node* GraphKit::new_instance(Node* klass_node,
+                             Node* extra_slow_test,
+                             bool raw_mem_only, // affect only raw memory
+                             Node* *return_size_val) {
+  // Compute size in doublewords
+  // The size is always an integral number of doublewords, represented
+  // as a positive bytewise size stored in the klass's layout_helper.
+  // The layout_helper also encodes (in a low bit) the need for a slow path.
+  jint  layout_con = Klass::_lh_neutral_value;
+  Node* layout_val = get_layout_helper(klass_node, layout_con);
+  int   layout_is_con = (layout_val == NULL);
+
+  if (extra_slow_test == NULL)  extra_slow_test = intcon(0);
+  // Generate the initial go-slow test.  It's either ALWAYS (return a
+  // Node for 1) or NEVER (return a NULL) or perhaps (in the reflective
+  // case) a computed value derived from the layout_helper.
+  Node* initial_slow_test = NULL;
+  if (layout_is_con) {
+    assert(!StressReflectiveCode, "stress mode does not use these paths");
+    bool must_go_slow = Klass::layout_helper_needs_slow_path(layout_con);
+    initial_slow_test = must_go_slow? intcon(1): extra_slow_test;
+
+  } else {   // reflective case
+    // This reflective path is used by Unsafe.allocateInstance.
+    // (It may be stress-tested by specifying StressReflectiveCode.)
+    // Basically, we want to get into the VM is there's an illegal argument.
+    Node* bit = intcon(Klass::_lh_instance_slow_path_bit);
+    initial_slow_test = _gvn.transform( new (C, 3) AndINode(layout_val, bit) );
+    if (extra_slow_test != intcon(0)) {
+      initial_slow_test = _gvn.transform( new (C, 3) OrINode(initial_slow_test, extra_slow_test) );
+    }
+    // (Macro-expander will further convert this to a Bool, if necessary.)
+  }
+
+  // Find the size in bytes.  This is easy; it's the layout_helper.
+  // The size value must be valid even if the slow path is taken.
+  Node* size = NULL;
+  if (layout_is_con) {
+    size = MakeConX(Klass::layout_helper_size_in_bytes(layout_con));
+  } else {   // reflective case
+    // This reflective path is used by clone and Unsafe.allocateInstance.
+    size = ConvI2X(layout_val);
+
+    // Clear the low bits to extract layout_helper_size_in_bytes:
+    assert((int)Klass::_lh_instance_slow_path_bit < BytesPerLong, "clear bit");
+    Node* mask = MakeConX(~ (intptr_t)right_n_bits(LogBytesPerLong));
+    size = _gvn.transform( new (C, 3) AndXNode(size, mask) );
+  }
+  if (return_size_val != NULL) {
+    (*return_size_val) = size;
+  }
+
+  // This is a precise notnull oop of the klass.
+  // (Actually, it need not be precise if this is a reflective allocation.)
+  // It's what we cast the result to.
+  const TypeKlassPtr* tklass = _gvn.type(klass_node)->isa_klassptr();
+  if (!tklass)  tklass = TypeKlassPtr::OBJECT;
+  const TypeOopPtr* oop_type = tklass->as_instance_type();
+
+  // Now generate allocation code
+  AllocateNode* alloc
+    = new (C, AllocateNode::ParmLimit)
+        AllocateNode(C, AllocateNode::alloc_type(),
+                     control(), memory(Compile::AliasIdxRaw), i_o(),
+                     size, klass_node,
+                     initial_slow_test);
+
+  return set_output_for_allocation(alloc, oop_type, raw_mem_only);
+}
+
+//-------------------------------new_array-------------------------------------
+// helper for both newarray and anewarray
+// The 'length' parameter is (obviously) the length of the array.
+// See comments on new_instance for the meaning of the other arguments.
+Node* GraphKit::new_array(Node* klass_node,     // array klass (maybe variable)
+                          Node* length,         // number of array elements
+                          bool raw_mem_only,    // affect only raw memory
+                          Node* *return_size_val) {
+  jint  layout_con = Klass::_lh_neutral_value;
+  Node* layout_val = get_layout_helper(klass_node, layout_con);
+  int   layout_is_con = (layout_val == NULL);
+
+  if (!layout_is_con && !StressReflectiveCode &&
+      !too_many_traps(Deoptimization::Reason_class_check)) {
+    // This is a reflective array creation site.
+    // Optimistically assume that it is a subtype of Object[],
+    // so that we can fold up all the address arithmetic.
+    layout_con = Klass::array_layout_helper(T_OBJECT);
+    Node* cmp_lh = _gvn.transform( new(C, 3) CmpINode(layout_val, intcon(layout_con)) );
+    Node* bol_lh = _gvn.transform( new(C, 2) BoolNode(cmp_lh, BoolTest::eq) );
+    { BuildCutout unless(this, bol_lh, PROB_MAX);
+      uncommon_trap(Deoptimization::Reason_class_check,
+                    Deoptimization::Action_maybe_recompile);
+    }
+    layout_val = NULL;
+    layout_is_con = true;
+  }
+
+  // Generate the initial go-slow test.  Make sure we do not overflow
+  // if length is huge (near 2Gig) or negative!  We do not need
+  // exact double-words here, just a close approximation of needed
+  // double-words.  We can't add any offset or rounding bits, lest we
+  // take a size -1 of bytes and make it positive.  Use an unsigned
+  // compare, so negative sizes look hugely positive.
+  int fast_size_limit = FastAllocateSizeLimit;
+  if (layout_is_con) {
+    assert(!StressReflectiveCode, "stress mode does not use these paths");
+    // Increase the size limit if we have exact knowledge of array type.
+    int log2_esize = Klass::layout_helper_log2_element_size(layout_con);
+    fast_size_limit <<= (LogBytesPerLong - log2_esize);
+  }
+
+  Node* initial_slow_cmp  = _gvn.transform( new (C, 3) CmpUNode( length, intcon( fast_size_limit ) ) );
+  Node* initial_slow_test = _gvn.transform( new (C, 2) BoolNode( initial_slow_cmp, BoolTest::gt ) );
+  if (initial_slow_test->is_Bool()) {
+    // Hide it behind a CMoveI, or else PhaseIdealLoop::split_up will get sick.
+    initial_slow_test = initial_slow_test->as_Bool()->as_int_value(&_gvn);
+  }
+
+  // --- Size Computation ---
+  // array_size = round_to_heap(array_header + (length << elem_shift));
+  // where round_to_heap(x) == round_to(x, MinObjAlignmentInBytes)
+  // and round_to(x, y) == ((x + y-1) & ~(y-1))
+  // The rounding mask is strength-reduced, if possible.
+  int round_mask = MinObjAlignmentInBytes - 1;
+  Node* header_size = NULL;
+  int   header_size_min  = arrayOopDesc::base_offset_in_bytes(T_BYTE);
+  // (T_BYTE has the weakest alignment and size restrictions...)
+  if (layout_is_con) {
+    int       hsize  = Klass::layout_helper_header_size(layout_con);
+    int       eshift = Klass::layout_helper_log2_element_size(layout_con);
+    BasicType etype  = Klass::layout_helper_element_type(layout_con);
+    if ((round_mask & ~right_n_bits(eshift)) == 0)
+      round_mask = 0;  // strength-reduce it if it goes away completely
+    assert((hsize & right_n_bits(eshift)) == 0, "hsize is pre-rounded");
+    assert(header_size_min <= hsize, "generic minimum is smallest");
+    header_size_min = hsize;
+    header_size = intcon(hsize + round_mask);
+  } else {
+    Node* hss   = intcon(Klass::_lh_header_size_shift);
+    Node* hsm   = intcon(Klass::_lh_header_size_mask);
+    Node* hsize = _gvn.transform( new(C, 3) URShiftINode(layout_val, hss) );
+    hsize       = _gvn.transform( new(C, 3) AndINode(hsize, hsm) );
+    Node* mask  = intcon(round_mask);
+    header_size = _gvn.transform( new(C, 3) AddINode(hsize, mask) );
+  }
+
+  Node* elem_shift = NULL;
+  if (layout_is_con) {
+    int eshift = Klass::layout_helper_log2_element_size(layout_con);
+    if (eshift != 0)
+      elem_shift = intcon(eshift);
+  } else {
+    // There is no need to mask or shift this value.
+    // The semantics of LShiftINode include an implicit mask to 0x1F.
+    assert(Klass::_lh_log2_element_size_shift == 0, "use shift in place");
+    elem_shift = layout_val;
+  }
+
+  // Transition to native address size for all offset calculations:
+  Node* lengthx = ConvI2X(length);
+  Node* headerx = ConvI2X(header_size);
+#ifdef _LP64
+  { const TypeLong* tllen = _gvn.find_long_type(lengthx);
+    if (tllen != NULL && tllen->_lo < 0) {
+      // Add a manual constraint to a positive range.  Cf. array_element_address.
+      jlong size_max = arrayOopDesc::max_array_length(T_BYTE);
+      if (size_max > tllen->_hi)  size_max = tllen->_hi;
+      const TypeLong* tlcon = TypeLong::make(CONST64(0), size_max, Type::WidenMin);
+      lengthx = _gvn.transform( new (C, 2) ConvI2LNode(length, tlcon));
+    }
+  }
+#endif
+
+  // Combine header size (plus rounding) and body size.  Then round down.
+  // This computation cannot overflow, because it is used only in two
+  // places, one where the length is sharply limited, and the other
+  // after a successful allocation.
+  Node* abody = lengthx;
+  if (elem_shift != NULL)
+    abody     = _gvn.transform( new(C, 3) LShiftXNode(lengthx, elem_shift) );
+  Node* size  = _gvn.transform( new(C, 3) AddXNode(headerx, abody) );
+  if (round_mask != 0) {
+    Node* mask = MakeConX(~round_mask);
+    size       = _gvn.transform( new(C, 3) AndXNode(size, mask) );
+  }
+  // else if round_mask == 0, the size computation is self-rounding
+
+  if (return_size_val != NULL) {
+    // This is the size
+    (*return_size_val) = size;
+  }
+
+  // Now generate allocation code
+  // Create the AllocateArrayNode and its result projections
+  AllocateArrayNode* alloc
+    = new (C, AllocateArrayNode::ParmLimit)
+        AllocateArrayNode(C, AllocateArrayNode::alloc_type(),
+                          control(), memory(Compile::AliasIdxRaw), i_o(),
+                          size, klass_node,
+                          initial_slow_test,
+                          length);
+
+  // Cast to correct type.  Note that the klass_node may be constant or not,
+  // and in the latter case the actual array type will be inexact also.
+  // (This happens via a non-constant argument to inline_native_newArray.)
+  // In any case, the value of klass_node provides the desired array type.
+  const TypeInt* length_type = _gvn.find_int_type(length);
+  const TypeInt* narrow_length_type = NULL;
+  const TypeOopPtr* ary_type = _gvn.type(klass_node)->is_klassptr()->as_instance_type();
+  if (ary_type->isa_aryptr() && length_type != NULL) {
+    // Try to get a better type than POS for the size
+    ary_type = ary_type->is_aryptr()->cast_to_size(length_type);
+    narrow_length_type = ary_type->is_aryptr()->size();
+    if (narrow_length_type == length_type)
+      narrow_length_type = NULL;
+  }
+
+  Node* javaoop = set_output_for_allocation(alloc, ary_type, raw_mem_only);
+
+  // Cast length on remaining path to be positive:
+  if (narrow_length_type != NULL) {
+    Node* ccast = new (C, 2) CastIINode(length, narrow_length_type);
+    ccast->set_req(0, control());
+    _gvn.set_type_bottom(ccast);
+    record_for_igvn(ccast);
+    if (map()->find_edge(length) >= 0) {
+      replace_in_map(length, ccast);
+    }
+  }
+
+  return javaoop;
+}
+
+// The following "Ideal_foo" functions are placed here because they recognize
+// the graph shapes created by the functions immediately above.
+
+//---------------------------Ideal_allocation----------------------------------
+// Given an oop pointer or raw pointer, see if it feeds from an AllocateNode.
+AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase) {
+  if (ptr == NULL) {     // reduce dumb test in callers
+    return NULL;
+  }
+  if (ptr->is_CheckCastPP()) {  // strip a raw-to-oop cast
+    ptr = ptr->in(1);
+    if (ptr == NULL)  return NULL;
+  }
+  if (ptr->is_Proj()) {
+    Node* allo = ptr->in(0);
+    if (allo != NULL && allo->is_Allocate()) {
+      return allo->as_Allocate();
+    }
+  }
+  // Report failure to match.
+  return NULL;
+}
+
+// Fancy version which also strips off an offset (and reports it to caller).
+AllocateNode* AllocateNode::Ideal_allocation(Node* ptr, PhaseTransform* phase,
+                                             intptr_t& offset) {
+  Node* base = AddPNode::Ideal_base_and_offset(ptr, phase, offset);
+  if (base == NULL)  return NULL;
+  return Ideal_allocation(base, phase);
+}
+
+// Trace Initialize <- Proj[Parm] <- Allocate
+AllocateNode* InitializeNode::allocation() {
+  Node* rawoop = in(InitializeNode::RawAddress);
+  if (rawoop->is_Proj()) {
+    Node* alloc = rawoop->in(0);
+    if (alloc->is_Allocate()) {
+      return alloc->as_Allocate();
+    }
+  }
+  return NULL;
+}
+
+// Trace Allocate -> Proj[Parm] -> Initialize
+InitializeNode* AllocateNode::initialization() {
+  ProjNode* rawoop = proj_out(AllocateNode::RawAddress);
+  if (rawoop == NULL)  return NULL;
+  for (DUIterator_Fast imax, i = rawoop->fast_outs(imax); i < imax; i++) {
+    Node* init = rawoop->fast_out(i);
+    if (init->is_Initialize()) {
+      assert(init->as_Initialize()->allocation() == this, "2-way link");
+      return init->as_Initialize();
+    }
+  }
+  return NULL;
+}
diff --git a/src/share/vm/opto/graphKit.hpp b/src/share/vm/opto/graphKit.hpp
new file mode 100644
index 000000000..c9ea02625
--- /dev/null
+++ b/src/share/vm/opto/graphKit.hpp
@@ -0,0 +1,720 @@
+/*
+ * Copyright 2001-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class FastLockNode;
+class FastUnlockNode;
+class Parse;
+class RootNode;
+
+//-----------------------------------------------------------------------------
+//----------------------------GraphKit-----------------------------------------
+// Toolkit for building the common sorts of subgraphs.
+// Does not know about bytecode parsing or type-flow results.
+// It is able to create graphs implementing the semantics of most
+// or all bytecodes, so that it can expand intrinsics and calls.
+// It may depend on JVMState structure, but it must not depend
+// on specific bytecode streams.
+class GraphKit : public Phase {
+  friend class PreserveJVMState;
+
+ protected:
+  ciEnv*            _env;       // Compilation environment
+  PhaseGVN         &_gvn;       // Some optimizations while parsing
+  SafePointNode*    _map;       // Parser map from JVM to Nodes
+  SafePointNode*    _exceptions;// Parser map(s) for exception state(s)
+  int               _sp;        // JVM Expression Stack Pointer
+  int               _bci;       // JVM Bytecode Pointer
+  ciMethod*         _method;    // JVM Current Method
+
+ private:
+  SafePointNode*     map_not_null() const {
+    assert(_map != NULL, "must call stopped() to test for reset compiler map");
+    return _map;
+  }
+
+ public:
+  GraphKit();                   // empty constructor
+  GraphKit(JVMState* jvms);     // the JVM state on which to operate
+
+#ifdef ASSERT
+  ~GraphKit() {
+    assert(!has_exceptions(), "user must call transfer_exceptions_into_jvms");
+  }
+#endif
+
+  virtual Parse* is_Parse() const { return NULL; }
+
+  ciEnv*        env()           const { return _env; }
+  PhaseGVN&     gvn()           const { return _gvn; }
+
+  void record_for_igvn(Node* n) const { C->record_for_igvn(n); }  // delegate to Compile
+
+  // Handy well-known nodes:
+  Node*         null()          const { return zerocon(T_OBJECT); }
+  Node*         top()           const { return C->top(); }
+  RootNode*     root()          const { return C->root(); }
+
+  // Create or find a constant node
+  Node* intcon(jint con)        const { return _gvn.intcon(con); }
+  Node* longcon(jlong con)      const { return _gvn.longcon(con); }
+  Node* makecon(const Type *t)  const { return _gvn.makecon(t); }
+  Node* zerocon(BasicType bt)   const { return _gvn.zerocon(bt); }
+  // (See also macro MakeConX in type.hpp, which uses intcon or longcon.)
+
+  jint  find_int_con(Node* n, jint value_if_unknown) {
+    return _gvn.find_int_con(n, value_if_unknown);
+  }
+  jlong find_long_con(Node* n, jlong value_if_unknown) {
+    return _gvn.find_long_con(n, value_if_unknown);
+  }
+  // (See also macro find_intptr_t_con in type.hpp, which uses one of these.)
+
+  // JVM State accessors:
+  // Parser mapping from JVM indices into Nodes.
+  // Low slots are accessed by the StartNode::enum.
+  // Then come the locals at StartNode::Parms to StartNode::Parms+max_locals();
+  // Then come JVM stack slots.
+  // Finally come the monitors, if any.
+  // See layout accessors in class JVMState.
+
+  SafePointNode*     map()      const { return _map; }
+  bool               has_exceptions() const { return _exceptions != NULL; }
+  JVMState*          jvms()     const { return map_not_null()->_jvms; }
+  int                sp()       const { return _sp; }
+  int                bci()      const { return _bci; }
+  Bytecodes::Code    java_bc()  const;
+  ciMethod*          method()   const { return _method; }
+
+  void set_jvms(JVMState* jvms)       { set_map(jvms->map());
+                                        assert(jvms == this->jvms(), "sanity");
+                                        _sp = jvms->sp();
+                                        _bci = jvms->bci();
+                                        _method = jvms->has_method() ? jvms->method() : NULL; }
+  void set_map(SafePointNode* m)      { _map = m; debug_only(verify_map()); }
+  void set_sp(int i)                  { assert(i >= 0, "must be non-negative"); _sp = i; }
+  void clean_stack(int from_sp); // clear garbage beyond from_sp to top
+
+  void inc_sp(int i)                  { set_sp(sp() + i); }
+  void set_bci(int bci)               { _bci = bci; }
+
+  // Make sure jvms has current bci & sp.
+  JVMState* sync_jvms()     const;
+#ifdef ASSERT
+  // Make sure JVMS has an updated copy of bci and sp.
+  // Also sanity-check method, depth, and monitor depth.
+  bool jvms_in_sync() const;
+
+  // Make sure the map looks OK.
+  void verify_map() const;
+
+  // Make sure a proposed exception state looks OK.
+  static void verify_exception_state(SafePointNode* ex_map);
+#endif
+
+  // Clone the existing map state.  (Implements PreserveJVMState.)
+  SafePointNode* clone_map();
+
+  // Set the map to a clone of the given one.
+  void set_map_clone(SafePointNode* m);
+
+  // Tell if the compilation is failing.
+  bool failing() const { return C->failing(); }
+
+  // Set _map to NULL, signalling a stop to further bytecode execution.
+  // Preserve the map intact for future use, and return it back to the caller.
+  SafePointNode* stop() { SafePointNode* m = map(); set_map(NULL); return m; }
+
+  // Stop, but first smash the map's inputs to NULL, to mark it dead.
+  void stop_and_kill_map();
+
+  // Tell if _map is NULL, or control is top.
+  bool stopped();
+
+  // Tell if this method or any caller method has exception handlers.
+  bool has_ex_handler();
+
+  // Save an exception without blowing stack contents or other JVM state.
+  // (The extra pointer is stuck with add_req on the map, beyond the JVMS.)
+  static void set_saved_ex_oop(SafePointNode* ex_map, Node* ex_oop);
+
+  // Recover a saved exception from its map.
+  static Node* saved_ex_oop(SafePointNode* ex_map);
+
+  // Recover a saved exception from its map, and remove it from the map.
+  static Node* clear_saved_ex_oop(SafePointNode* ex_map);
+
+#ifdef ASSERT
+  // Recover a saved exception from its map, and remove it from the map.
+  static bool has_saved_ex_oop(SafePointNode* ex_map);
+#endif
+
+  // Push an exception in the canonical position for handlers (stack(0)).
+  void push_ex_oop(Node* ex_oop) {
+    ensure_stack(1);  // ensure room to push the exception
+    set_stack(0, ex_oop);
+    set_sp(1);
+    clean_stack(1);
+  }
+
+  // Detach and return an exception state.
+  SafePointNode* pop_exception_state() {
+    SafePointNode* ex_map = _exceptions;
+    if (ex_map != NULL) {
+      _exceptions = ex_map->next_exception();
+      ex_map->set_next_exception(NULL);
+      debug_only(verify_exception_state(ex_map));
+    }
+    return ex_map;
+  }
+
+  // Add an exception, using the given JVM state, without commoning.
+  void push_exception_state(SafePointNode* ex_map) {
+    debug_only(verify_exception_state(ex_map));
+    ex_map->set_next_exception(_exceptions);
+    _exceptions = ex_map;
+  }
+
+  // Turn the current JVM state into an exception state, appending the ex_oop.
+  SafePointNode* make_exception_state(Node* ex_oop);
+
+  // Add an exception, using the given JVM state.
+  // Combine all exceptions with a common exception type into a single state.
+  // (This is done via combine_exception_states.)
+  void add_exception_state(SafePointNode* ex_map);
+
+  // Combine all exceptions of any sort whatever into a single master state.
+  SafePointNode* combine_and_pop_all_exception_states() {
+    if (_exceptions == NULL)  return NULL;
+    SafePointNode* phi_map = pop_exception_state();
+    SafePointNode* ex_map;
+    while ((ex_map = pop_exception_state()) != NULL) {
+      combine_exception_states(ex_map, phi_map);
+    }
+    return phi_map;
+  }
+
+  // Combine the two exception states, building phis as necessary.
+  // The second argument is updated to include contributions from the first.
+  void combine_exception_states(SafePointNode* ex_map, SafePointNode* phi_map);
+
+  // Reset the map to the given state.  If there are any half-finished phis
+  // in it (created by combine_exception_states), transform them now.
+  // Returns the exception oop.  (Caller must call push_ex_oop if required.)
+  Node* use_exception_state(SafePointNode* ex_map);
+
+  // Collect exceptions from a given JVM state into my exception list.
+  void add_exception_states_from(JVMState* jvms);
+
+  // Collect all raised exceptions into the current JVM state.
+  // Clear the current exception list and map, returns the combined states.
+  JVMState* transfer_exceptions_into_jvms();
+
+  // Helper to throw a built-in exception.
+  // Range checks take the offending index.
+  // Cast and array store checks take the offending class.
+  // Others do not take the optional argument.
+  // The JVMS must allow the bytecode to be re-executed
+  // via an uncommon trap.
+  void builtin_throw(Deoptimization::DeoptReason reason, Node* arg = NULL);
+
+  // Helper Functions for adding debug information
+  void kill_dead_locals();
+#ifdef ASSERT
+  bool dead_locals_are_killed();
+#endif
+  // The call may deoptimize.  Supply required JVM state as debug info.
+  // If must_throw is true, the call is guaranteed not to return normally.
+  void add_safepoint_edges(SafePointNode* call,
+                           bool must_throw = false);
+
+  // How many stack inputs does the current BC consume?
+  // And, how does the stack change after the bytecode?
+  // Returns false if unknown.
+  bool compute_stack_effects(int& inputs, int& depth);
+
+  // Add a fixed offset to a pointer
+  Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset) {
+    return basic_plus_adr(base, ptr, MakeConX(offset));
+  }
+  Node* basic_plus_adr(Node* base, intptr_t offset) {
+    return basic_plus_adr(base, base, MakeConX(offset));
+  }
+  // Add a variable offset to a pointer
+  Node* basic_plus_adr(Node* base, Node* offset) {
+    return basic_plus_adr(base, base, offset);
+  }
+  Node* basic_plus_adr(Node* base, Node* ptr, Node* offset);
+
+  // Convert between int and long, and size_t.
+  // (See macros ConvI2X, etc., in type.hpp for ConvI2X, etc.)
+  Node* ConvI2L(Node* offset);
+  Node* ConvL2I(Node* offset);
+  // Find out the klass of an object.
+  Node* load_object_klass(Node* object);
+  // Find out the length of an array.
+  Node* load_array_length(Node* array);
+  // Helper function to do a NULL pointer check or ZERO check based on type.
+  Node* null_check_common(Node* value, BasicType type,
+                          bool assert_null, Node* *null_control);
+  // Throw an exception if a given value is null.
+  // Return the value cast to not-null.
+  // Be clever about equivalent dominating null checks.
+  Node* do_null_check(Node* value, BasicType type) {
+    return null_check_common(value, type, false, NULL);
+  }
+  // Throw an uncommon trap if a given value is __not__ null.
+  // Return the value cast to null, and be clever about dominating checks.
+  Node* do_null_assert(Node* value, BasicType type) {
+    return null_check_common(value, type, true, NULL);
+  }
+  // Null check oop.  Return null-path control into (*null_control).
+  // Return a cast-not-null node which depends on the not-null control.
+  // If never_see_null, use an uncommon trap (*null_control sees a top).
+  // The cast is not valid along the null path; keep a copy of the original.
+  Node* null_check_oop(Node* value, Node* *null_control,
+                       bool never_see_null = false);
+
+  // Cast obj to not-null on this path
+  Node* cast_not_null(Node* obj, bool do_replace_in_map = true);
+  // Replace all occurrences of one node by another.
+  void replace_in_map(Node* old, Node* neww);
+
+  void push(Node* n)    { map_not_null(); _map->set_stack(_map->_jvms,_sp++,n); }
+  Node* pop()           { map_not_null(); return _map->stack(_map->_jvms,--_sp); }
+  Node* peek(int off=0) { map_not_null(); return _map->stack(_map->_jvms, _sp - off - 1); }
+
+  void push_pair(Node* ldval) {
+    push(ldval);
+    push(top());  // the halfword is merely a placeholder
+  }
+  void push_pair_local(int i) {
+    // longs are stored in locals in "push" order
+    push(  local(i+0) );  // the real value
+    assert(local(i+1) == top(), "");
+    push(top());  // halfword placeholder
+  }
+  Node* pop_pair() {
+    // the second half is pushed last & popped first; it contains exactly nothing
+    Node* halfword = pop();
+    assert(halfword == top(), "");
+    // the long bits are pushed first & popped last:
+    return pop();
+  }
+  void set_pair_local(int i, Node* lval) {
+    // longs are stored in locals as a value/half pair (like doubles)
+    set_local(i+0, lval);
+    set_local(i+1, top());
+  }
+
+  // Push the node, which may be zero, one, or two words.
+  void push_node(BasicType n_type, Node* n) {
+    int n_size = type2size[n_type];
+    if      (n_size == 1)  push(      n );  // T_INT, ...
+    else if (n_size == 2)  push_pair( n );  // T_DOUBLE, T_LONG
+    else                   { assert(n_size == 0, "must be T_VOID"); }
+  }
+
+  Node* pop_node(BasicType n_type) {
+    int n_size = type2size[n_type];
+    if      (n_size == 1)  return pop();
+    else if (n_size == 2)  return pop_pair();
+    else                   return NULL;
+  }
+
+  Node* control()               const { return map_not_null()->control(); }
+  Node* i_o()                   const { return map_not_null()->i_o(); }
+  Node* returnadr()             const { return map_not_null()->returnadr(); }
+  Node* frameptr()              const { return map_not_null()->frameptr(); }
+  Node* local(uint idx)         const { map_not_null(); return _map->local(      _map->_jvms, idx); }
+  Node* stack(uint idx)         const { map_not_null(); return _map->stack(      _map->_jvms, idx); }
+  Node* argument(uint idx)      const { map_not_null(); return _map->argument(   _map->_jvms, idx); }
+  Node* monitor_box(uint idx)   const { map_not_null(); return _map->monitor_box(_map->_jvms, idx); }
+  Node* monitor_obj(uint idx)   const { map_not_null(); return _map->monitor_obj(_map->_jvms, idx); }
+
+  void set_control  (Node* c)         { map_not_null()->set_control(c); }
+  void set_i_o      (Node* c)         { map_not_null()->set_i_o(c); }
+  void set_local(uint idx, Node* c)   { map_not_null(); _map->set_local(   _map->_jvms, idx, c); }
+  void set_stack(uint idx, Node* c)   { map_not_null(); _map->set_stack(   _map->_jvms, idx, c); }
+  void set_argument(uint idx, Node* c){ map_not_null(); _map->set_argument(_map->_jvms, idx, c); }
+  void ensure_stack(uint stk_size)    { map_not_null(); _map->ensure_stack(_map->_jvms, stk_size); }
+
+  // Access unaliased memory
+  Node* memory(uint alias_idx);
+  Node* memory(const TypePtr *tp) { return memory(C->get_alias_index(tp)); }
+  Node* memory(Node* adr) { return memory(_gvn.type(adr)->is_ptr()); }
+
+  // Access immutable memory
+  Node* immutable_memory() { return C->immutable_memory(); }
+
+  // Set unaliased memory
+  void set_memory(Node* c, uint alias_idx) { merged_memory()->set_memory_at(alias_idx, c); }
+  void set_memory(Node* c, const TypePtr *tp) { set_memory(c,C->get_alias_index(tp)); }
+  void set_memory(Node* c, Node* adr) { set_memory(c,_gvn.type(adr)->is_ptr()); }
+
+  // Get the entire memory state (probably a MergeMemNode), and reset it
+  // (The resetting prevents somebody from using the dangling Node pointer.)
+  Node* reset_memory();
+
+  // Get the entire memory state, asserted to be a MergeMemNode.
+  MergeMemNode* merged_memory() {
+    Node* mem = map_not_null()->memory();
+    assert(mem->is_MergeMem(), "parse memory is always pre-split");
+    return mem->as_MergeMem();
+  }
+
+  // Set the entire memory state; produce a new MergeMemNode.
+  void set_all_memory(Node* newmem);
+
+  // Create a memory projection from the call, then set_all_memory.
+  void set_all_memory_call(Node* call);
+
+  // Create a LoadNode, reading from the parser's memory state.
+  // (Note:  require_atomic_access is useful only with T_LONG.)
+  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt,
+                  bool require_atomic_access = false) {
+    // This version computes alias_index from bottom_type
+    return make_load(ctl, adr, t, bt, adr->bottom_type()->is_ptr(),
+                     require_atomic_access);
+  }
+  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, const TypePtr* adr_type, bool require_atomic_access = false) {
+    // This version computes alias_index from an address type
+    assert(adr_type != NULL, "use other make_load factory");
+    return make_load(ctl, adr, t, bt, C->get_alias_index(adr_type),
+                     require_atomic_access);
+  }
+  // This is the base version which is given an alias index.
+  Node* make_load(Node* ctl, Node* adr, const Type* t, BasicType bt, int adr_idx, bool require_atomic_access = false);
+
+  // Create & transform a StoreNode and store the effect into the
+  // parser's memory state.
+  Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
+                        const TypePtr* adr_type,
+                        bool require_atomic_access = false) {
+    // This version computes alias_index from an address type
+    assert(adr_type != NULL, "use other store_to_memory factory");
+    return store_to_memory(ctl, adr, val, bt,
+                           C->get_alias_index(adr_type),
+                           require_atomic_access);
+  }
+  // This is the base version which is given alias index
+  // Return the new StoreXNode
+  Node* store_to_memory(Node* ctl, Node* adr, Node* val, BasicType bt,
+                        int adr_idx,
+                        bool require_atomic_access = false);
+
+
+  // All in one pre-barrier, store, post_barrier
+  // Insert a write-barrier'd store.  This is to let generational GC
+  // work; we have to flag all oop-stores before the next GC point.
+  //
+  // It comes in 3 flavors of store to an object, array, or unknown.
+  // We use precise card marks for arrays to avoid scanning the entire
+  // array. We use imprecise for object. We use precise for unknown
+  // since we don't know if we have an array or and object or even
+  // where the object starts.
+  //
+  // If val==NULL, it is taken to be a completely unknown value. QQQ
+
+  Node* store_oop_to_object(Node* ctl,
+                            Node* obj,   // containing obj
+                            Node* adr,  // actual adress to store val at
+                            const TypePtr* adr_type,
+                            Node* val,
+                            const Type* val_type,
+                            BasicType bt);
+
+  Node* store_oop_to_array(Node* ctl,
+                           Node* obj,   // containing obj
+                           Node* adr,  // actual adress to store val at
+                           const TypePtr* adr_type,
+                           Node* val,
+                           const Type* val_type,
+                           BasicType bt);
+
+  // Could be an array or object we don't know at compile time (unsafe ref.)
+  Node* store_oop_to_unknown(Node* ctl,
+                             Node* obj,   // containing obj
+                             Node* adr,  // actual adress to store val at
+                             const TypePtr* adr_type,
+                             Node* val,
+                             const Type* val_type,
+                             BasicType bt);
+
+  // For the few case where the barriers need special help
+  void pre_barrier(Node* ctl, Node* obj, Node* adr, uint adr_idx,
+                   Node* val, const Type* val_type, BasicType bt);
+
+  void post_barrier(Node* ctl, Node* store, Node* obj, Node* adr, uint adr_idx,
+                    Node* val, BasicType bt, bool use_precise);
+
+  // Return addressing for an array element.
+  Node* array_element_address(Node* ary, Node* idx, BasicType elembt,
+                              // Optional constraint on the array size:
+                              const TypeInt* sizetype = NULL);
+
+  // Return a load of array element at idx.
+  Node* load_array_element(Node* ctl, Node* ary, Node* idx, const TypeAryPtr* arytype);
+
+  // CMS card-marks have an input from the corresponding oop_store
+  void  cms_card_mark(Node* ctl, Node* adr, Node* val, Node* oop_store);
+
+  //---------------- Dtrace support --------------------
+  void make_dtrace_method_entry_exit(ciMethod* method, bool is_entry);
+  void make_dtrace_method_entry(ciMethod* method) {
+    make_dtrace_method_entry_exit(method, true);
+  }
+  void make_dtrace_method_exit(ciMethod* method) {
+    make_dtrace_method_entry_exit(method, false);
+  }
+
+  //--------------- stub generation -------------------
+ public:
+  void gen_stub(address C_function,
+                const char *name,
+                int is_fancy_jump,
+                bool pass_tls,
+                bool return_pc);
+
+  //---------- help for generating calls --------------
+
+  // Do a null check on the receiver, which is in argument(0).
+  Node* null_check_receiver(ciMethod* callee) {
+    assert(!callee->is_static(), "must be a virtual method");
+    int nargs = 1 + callee->signature()->size();
+    // Null check on self without removing any arguments.  The argument
+    // null check technically happens in the wrong place, which can lead to
+    // invalid stack traces when the primitive is inlined into a method
+    // which handles NullPointerExceptions.
+    Node* receiver = argument(0);
+    _sp += nargs;
+    receiver = do_null_check(receiver, T_OBJECT);
+    _sp -= nargs;
+    return receiver;
+  }
+
+  // Fill in argument edges for the call from argument(0), argument(1), ...
+  // (The next step is to call set_edges_for_java_call.)
+  void  set_arguments_for_java_call(CallJavaNode* call);
+
+  // Fill in non-argument edges for the call.
+  // Transform the call, and update the basics: control, i_o, memory.
+  // (The next step is usually to call set_results_for_java_call.)
+  void set_edges_for_java_call(CallJavaNode* call,
+                               bool must_throw = false);
+
+  // Finish up a java call that was started by set_edges_for_java_call.
+  // Call add_exception on any throw arising from the call.
+  // Return the call result (transformed).
+  Node* set_results_for_java_call(CallJavaNode* call);
+
+  // Similar to set_edges_for_java_call, but simplified for runtime calls.
+  void  set_predefined_output_for_runtime_call(Node* call) {
+    set_predefined_output_for_runtime_call(call, NULL, NULL);
+  }
+  void  set_predefined_output_for_runtime_call(Node* call,
+                                               Node* keep_mem,
+                                               const TypePtr* hook_mem);
+  Node* set_predefined_input_for_runtime_call(SafePointNode* call);
+
+  // helper functions for statistics
+  void increment_counter(address counter_addr);   // increment a debug counter
+  void increment_counter(Node*   counter_addr);   // increment a debug counter
+
+  // Bail out to the interpreter right now
+  // The optional klass is the one causing the trap.
+  // The optional reason is debug information written to the compile log.
+  // Optional must_throw is the same as with add_safepoint_edges.
+  void uncommon_trap(int trap_request,
+                     ciKlass* klass = NULL, const char* reason_string = NULL,
+                     bool must_throw = false, bool keep_exact_action = false);
+
+  // Shorthand, to avoid saying "Deoptimization::" so many times.
+  void uncommon_trap(Deoptimization::DeoptReason reason,
+                     Deoptimization::DeoptAction action,
+                     ciKlass* klass = NULL, const char* reason_string = NULL,
+                     bool must_throw = false, bool keep_exact_action = false) {
+    uncommon_trap(Deoptimization::make_trap_request(reason, action),
+                  klass, reason_string, must_throw, keep_exact_action);
+  }
+
+  // Report if there were too many traps at the current method and bci.
+  // Report if a trap was recorded, and/or PerMethodTrapLimit was exceeded.
+  // If there is no MDO at all, report no trap unless told to assume it.
+  bool too_many_traps(Deoptimization::DeoptReason reason) {
+    return C->too_many_traps(method(), bci(), reason);
+  }
+
+  // Report if there were too many recompiles at the current method and bci.
+  bool too_many_recompiles(Deoptimization::DeoptReason reason) {
+    return C->too_many_recompiles(method(), bci(), reason);
+  }
+
+  // vanilla/CMS post barrier
+  void write_barrier_post(Node *store, Node* obj, Node* adr, Node* val, bool use_precise);
+
+  // Returns the object (if any) which was created the moment before.
+  Node* just_allocated_object(Node* current_control);
+
+  static bool use_ReduceInitialCardMarks() {
+    return (ReduceInitialCardMarks
+            && Universe::heap()->can_elide_tlab_store_barriers());
+  }
+
+  // Helper function to round double arguments before a call
+  void round_double_arguments(ciMethod* dest_method);
+  void round_double_result(ciMethod* dest_method);
+
+  // rounding for strict float precision conformance
+  Node* precision_rounding(Node* n);
+
+  // rounding for strict double precision conformance
+  Node* dprecision_rounding(Node* n);
+
+  // rounding for non-strict double stores
+  Node* dstore_rounding(Node* n);
+
+  // Helper functions for fast/slow path codes
+  Node* opt_iff(Node* region, Node* iff);
+  Node* make_runtime_call(int flags,
+                          const TypeFunc* call_type, address call_addr,
+                          const char* call_name,
+                          const TypePtr* adr_type, // NULL if no memory effects
+                          Node* parm0 = NULL, Node* parm1 = NULL,
+                          Node* parm2 = NULL, Node* parm3 = NULL,
+                          Node* parm4 = NULL, Node* parm5 = NULL,
+                          Node* parm6 = NULL, Node* parm7 = NULL);
+  enum {  // flag values for make_runtime_call
+    RC_NO_FP = 1,               // CallLeafNoFPNode
+    RC_NO_IO = 2,               // do not hook IO edges
+    RC_NO_LEAF = 4,             // CallStaticJavaNode
+    RC_MUST_THROW = 8,          // flag passed to add_safepoint_edges
+    RC_NARROW_MEM = 16,         // input memory is same as output
+    RC_UNCOMMON = 32,           // freq. expected to be like uncommon trap
+    RC_LEAF = 0                 // null value:  no flags set
+  };
+
+  // merge in all memory slices from new_mem, along the given path
+  void merge_memory(Node* new_mem, Node* region, int new_path);
+  void make_slow_call_ex(Node* call, ciInstanceKlass* ex_klass, bool separate_io_proj);
+
+  // Helper functions to build synchronizations
+  int next_monitor();
+  Node* insert_mem_bar(int opcode, Node* precedent = NULL);
+  Node* insert_mem_bar_volatile(int opcode, int alias_idx, Node* precedent = NULL);
+  // Optional 'precedent' is appended as an extra edge, to force ordering.
+  FastLockNode* shared_lock(Node* obj);
+  void shared_unlock(Node* box, Node* obj);
+
+  // helper functions for the fast path/slow path idioms
+  Node* fast_and_slow(Node* in, const Type *result_type, Node* null_result, IfNode* fast_test, Node* fast_result, address slow_call, const TypeFunc *slow_call_type, Node* slow_arg, klassOop ex_klass, Node* slow_result);
+
+  // Generate an instance-of idiom.  Used by both the instance-of bytecode
+  // and the reflective instance-of call.
+  Node* gen_instanceof( Node *subobj, Node* superkls );
+
+  // Generate a check-cast idiom.  Used by both the check-cast bytecode
+  // and the array-store bytecode
+  Node* gen_checkcast( Node *subobj, Node* superkls,
+                       Node* *failure_control = NULL );
+
+  // Generate a subtyping check.  Takes as input the subtype and supertype.
+  // Returns 2 values: sets the default control() to the true path and
+  // returns the false path.  Only reads from constant memory taken from the
+  // default memory; does not write anything.  It also doesn't take in an
+  // Object; if you wish to check an Object you need to load the Object's
+  // class prior to coming here.
+  Node* gen_subtype_check(Node* subklass, Node* superklass);
+
+  // Static parse-time type checking logic for gen_subtype_check:
+  enum { SSC_always_false, SSC_always_true, SSC_easy_test, SSC_full_test };
+  int static_subtype_check(ciKlass* superk, ciKlass* subk);
+
+  // Exact type check used for predicted calls and casts.
+  // Rewrites (*casted_receiver) to be casted to the stronger type.
+  // (Caller is responsible for doing replace_in_map.)
+  Node* type_check_receiver(Node* receiver, ciKlass* klass, float prob,
+                            Node* *casted_receiver);
+
+  // implementation of object creation
+  Node* set_output_for_allocation(AllocateNode* alloc,
+                                  const TypeOopPtr* oop_type,
+                                  bool raw_mem_only);
+  Node* get_layout_helper(Node* klass_node, jint& constant_value);
+  Node* new_instance(Node* klass_node,
+                     Node* slow_test = NULL,
+                     bool raw_mem_only = false,
+                     Node* *return_size_val = NULL);
+  Node* new_array(Node* klass_node, Node* count_val,
+                  bool raw_mem_only = false, Node* *return_size_val = NULL);
+
+  // Handy for making control flow
+  IfNode* create_and_map_if(Node* ctrl, Node* tst, float prob, float cnt) {
+    IfNode* iff = new (C, 2) IfNode(ctrl, tst, prob, cnt);// New IfNode's
+    _gvn.set_type(iff, iff->Value(&_gvn)); // Value may be known at parse-time
+    // Place 'if' on worklist if it will be in graph
+    if (!tst->is_Con())  record_for_igvn(iff);     // Range-check and Null-check removal is later
+    return iff;
+  }
+
+  IfNode* create_and_xform_if(Node* ctrl, Node* tst, float prob, float cnt) {
+    IfNode* iff = new (C, 2) IfNode(ctrl, tst, prob, cnt);// New IfNode's
+    _gvn.transform(iff);                           // Value may be known at parse-time
+    // Place 'if' on worklist if it will be in graph
+    if (!tst->is_Con())  record_for_igvn(iff);     // Range-check and Null-check removal is later
+    return iff;
+  }
+};
+
+// Helper class to support building of control flow branches. Upon
+// creation the map and sp at bci are cloned and restored upon de-
+// struction. Typical use:
+//
+// { PreserveJVMState pjvms(this);
+//   // code of new branch
+// }
+// // here the JVM state at bci is established
+
+class PreserveJVMState: public StackObj {
+ protected:
+  GraphKit*      _kit;
+#ifdef ASSERT
+  int            _block;  // PO of current block, if a Parse
+  int            _bci;
+#endif
+  SafePointNode* _map;
+  uint           _sp;
+
+ public:
+  PreserveJVMState(GraphKit* kit, bool clone_map = true);
+  ~PreserveJVMState();
+};
+
+// Helper class to build cutouts of the form if (p) ; else {x...}.
+// The code {x...} must not fall through.
+// The kit's main flow of control is set to the "then" continuation of if(p).
+class BuildCutout: public PreserveJVMState {
+ public:
+  BuildCutout(GraphKit* kit, Node* p, float prob, float cnt = COUNT_UNKNOWN);
+  ~BuildCutout();
+};
diff --git a/src/share/vm/opto/idealGraphPrinter.cpp b/src/share/vm/opto/idealGraphPrinter.cpp
new file mode 100644
index 000000000..e65cc72b4
--- /dev/null
+++ b/src/share/vm/opto/idealGraphPrinter.cpp
@@ -0,0 +1,1919 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_idealGraphPrinter.cpp.incl"
+
+#ifndef PRODUCT
+
+// Constants
+// Keep consistent with Java constants
+const char *IdealGraphPrinter::INDENT = "  ";
+const char *IdealGraphPrinter::TOP_ELEMENT = "graphDocument";
+const char *IdealGraphPrinter::GROUP_ELEMENT = "group";
+const char *IdealGraphPrinter::GRAPH_ELEMENT = "graph";
+const char *IdealGraphPrinter::PROPERTIES_ELEMENT = "properties";
+const char *IdealGraphPrinter::EDGES_ELEMENT = "edges";
+const char *IdealGraphPrinter::PROPERTY_ELEMENT = "p";
+const char *IdealGraphPrinter::EDGE_ELEMENT = "edge";
+const char *IdealGraphPrinter::NODE_ELEMENT = "node";
+const char *IdealGraphPrinter::NODES_ELEMENT = "nodes";
+const char *IdealGraphPrinter::REMOVE_EDGE_ELEMENT = "removeEdge";
+const char *IdealGraphPrinter::REMOVE_NODE_ELEMENT = "removeNode";
+const char *IdealGraphPrinter::METHOD_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::METHOD_IS_PUBLIC_PROPERTY = "public";
+const char *IdealGraphPrinter::METHOD_IS_STATIC_PROPERTY = "static";
+const char *IdealGraphPrinter::TRUE_VALUE = "true";
+const char *IdealGraphPrinter::NODE_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::EDGE_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::NODE_ID_PROPERTY = "id";
+const char *IdealGraphPrinter::FROM_PROPERTY = "from";
+const char *IdealGraphPrinter::TO_PROPERTY = "to";
+const char *IdealGraphPrinter::PROPERTY_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::GRAPH_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::INDEX_PROPERTY = "index";
+const char *IdealGraphPrinter::METHOD_ELEMENT = "method";
+const char *IdealGraphPrinter::INLINE_ELEMENT = "inline";
+const char *IdealGraphPrinter::BYTECODES_ELEMENT = "bytecodes";
+const char *IdealGraphPrinter::METHOD_BCI_PROPERTY = "bci";
+const char *IdealGraphPrinter::METHOD_SHORT_NAME_PROPERTY = "shortName";
+const char *IdealGraphPrinter::CONTROL_FLOW_ELEMENT = "controlFlow";
+const char *IdealGraphPrinter::BLOCK_NAME_PROPERTY = "name";
+const char *IdealGraphPrinter::BLOCK_DOMINATOR_PROPERTY = "dom";
+const char *IdealGraphPrinter::BLOCK_ELEMENT = "block";
+const char *IdealGraphPrinter::SUCCESSORS_ELEMENT = "successors";
+const char *IdealGraphPrinter::SUCCESSOR_ELEMENT = "successor";
+const char *IdealGraphPrinter::ASSEMBLY_ELEMENT = "assembly";
+
+int IdealGraphPrinter::_file_count = 0;
+
+IdealGraphPrinter *IdealGraphPrinter::printer() {
+  if (PrintIdealGraphLevel == 0) return NULL;
+
+  JavaThread *thread = JavaThread::current();
+  if (!thread->is_Compiler_thread()) return NULL;
+
+  CompilerThread *compiler_thread = (CompilerThread *)thread;
+  if (compiler_thread->ideal_graph_printer() == NULL) {
+    IdealGraphPrinter *printer = new IdealGraphPrinter();
+    compiler_thread->set_ideal_graph_printer(printer);
+  }
+
+  return compiler_thread->ideal_graph_printer();
+}
+
+void IdealGraphPrinter::clean_up() {
+  JavaThread *p;
+  for (p = Threads::first(); p; p = p->next()) {
+    if (p->is_Compiler_thread()) {
+      CompilerThread *c = (CompilerThread *)p;
+      IdealGraphPrinter *printer = c->ideal_graph_printer();
+      if (printer) {
+        delete printer;
+      }
+      c->set_ideal_graph_printer(NULL);
+    }
+  }
+}
+
+// Constructor, either file or network output
+IdealGraphPrinter::IdealGraphPrinter() {
+
+  _traverse_outs = false;
+  _should_send_method = true;
+  _output = NULL;
+  buffer[0] = 0;
+  _depth = 0;
+  _current_method = NULL;
+  assert(!_current_method, "current method must be initialized to NULL");
+  _arena = new Arena();
+
+  _stream = new (ResourceObj::C_HEAP) networkStream();
+
+  if (PrintIdealGraphFile != NULL) {
+    ThreadCritical tc;
+    // User wants all output to go to files
+    if (_file_count != 0) {
+      ResourceMark rm;
+      stringStream st;
+      const char* dot = strrchr(PrintIdealGraphFile, '.');
+      if (dot) {
+        st.write(PrintIdealGraphFile, dot - PrintIdealGraphFile);
+        st.print("%d%s", _file_count, dot);
+      } else {
+        st.print("%s%d", PrintIdealGraphFile, _file_count);
+      }
+      _output = new (ResourceObj::C_HEAP) fileStream(st.as_string());
+    } else {
+      _output = new (ResourceObj::C_HEAP) fileStream(PrintIdealGraphFile);
+    }
+    _file_count++;
+  } else {
+    // Try to connect to visualizer
+    if (_stream->connect(PrintIdealGraphAddress, PrintIdealGraphPort)) {
+      char c = 0;
+      _stream->read(&c, 1);
+      if (c != 'y') {
+        tty->print_cr("Client available, but does not want to receive data!");
+        _stream->close();
+        delete _stream;
+        _stream = NULL;
+        return;
+      }
+      _output = _stream;
+    } else {
+      // It would be nice if we could shut down cleanly but it should
+      // be an error if we can't connect to the visualizer.
+      fatal2("Couldn't connect to visualizer at %s:%d", PrintIdealGraphAddress, PrintIdealGraphPort);
+    }
+  }
+
+  start_element(TOP_ELEMENT);
+}
+
+// Destructor, close file or network stream
+IdealGraphPrinter::~IdealGraphPrinter() {
+
+  end_element(TOP_ELEMENT);
+
+  if (_stream) {
+    delete _stream;
+    if (_stream == _output) {
+      _output = NULL;
+    }
+    _stream = NULL;
+  }
+
+  if (_output) {
+    delete _output;
+    _output = NULL;
+  }
+}
+
+void IdealGraphPrinter::print_ifg(PhaseIFG* ifg) {
+
+  // Code to print an interference graph to tty, currently not used
+
+  /*
+  if (!_current_method) return;
+   // Remove neighbor colors
+
+  for (uint i = 0; i < ifg._maxlrg; i++) {
+
+    IndexSet *s = ifg.neighbors(i);
+    IndexSetIterator elements(s);
+    uint neighbor;
+    while ((neighbor = elements.next()) != 0) {
+        tty->print_cr("Edge between %d and %d\n", i, neighbor);
+    }
+  }
+
+
+  for (uint i = 0; i < ifg._maxlrg; i++) {
+    LRG &l = ifg.lrgs(i);
+    if (l._def) {
+      OptoReg::Name name = l.reg();
+      tty->print("OptoReg::dump: ");
+      OptoReg::dump(name);
+      tty->print_cr("");
+      tty->print_cr("name=%d\n", name);
+      if (name) {
+        if (OptoReg::is_stack(name)) {
+          tty->print_cr("Stack number %d\n", OptoReg::reg2stack(name));
+
+        } else if (!OptoReg::is_valid(name)) {
+          tty->print_cr("BAD!!!");
+        } else {
+
+          if (OptoReg::is_reg(name)) {
+          tty->print_cr(OptoReg::regname(name));
+          } else {
+            int x = 0;
+          }
+        }
+        int x = 0;
+      }
+
+      if (l._def == NodeSentinel) {
+        tty->print("multiple mapping from %d: ", i);
+        for (int j=0; j<l._defs->length(); j++) {
+          tty->print("%d ", l._defs->at(j)->_idx);
+        }
+        tty->print_cr("");
+      } else {
+        tty->print_cr("mapping between %d and %d\n", i, l._def->_idx);
+      }
+    }
+  }*/
+}
+
+void IdealGraphPrinter::print_method(ciMethod *method, int bci, InlineTree *tree) {
+
+  Properties properties;
+  stringStream str;
+  method->print_name(&str);
+
+  stringStream shortStr;
+  method->print_short_name(&shortStr);
+
+
+  properties.add(new Property(METHOD_NAME_PROPERTY, str.as_string()));
+  properties.add(new Property(METHOD_SHORT_NAME_PROPERTY, shortStr.as_string()));
+  properties.add(new Property(METHOD_BCI_PROPERTY, bci));
+  start_element(METHOD_ELEMENT, &properties);
+
+  start_element(BYTECODES_ELEMENT);
+  output()->print_cr("<![CDATA[");
+  method->print_codes_on(output());
+  output()->print_cr("]]>");
+  end_element(BYTECODES_ELEMENT);
+
+  start_element(INLINE_ELEMENT);
+  if (tree != NULL) {
+    GrowableArray<InlineTree *> subtrees = tree->subtrees();
+    for (int i = 0; i < subtrees.length(); i++) {
+      print_inline_tree(subtrees.at(i));
+    }
+  }
+  end_element(INLINE_ELEMENT);
+
+  end_element(METHOD_ELEMENT);
+  output()->flush();
+}
+
+void IdealGraphPrinter::print_inline_tree(InlineTree *tree) {
+
+  if (tree == NULL) return;
+
+  ciMethod *method = tree->method();
+  print_method(tree->method(), tree->caller_bci(), tree);
+
+}
+
+void IdealGraphPrinter::clear_nodes() {
+ // for (int i = 0; i < _nodes.length(); i++) {
+ //   _nodes.at(i)->clear_node();
+ // }
+}
+
+void IdealGraphPrinter::print_inlining(Compile* compile) {
+
+  // Print inline tree
+  if (_should_send_method) {
+    InlineTree *inlineTree = compile->ilt();
+    if (inlineTree != NULL) {
+      print_inline_tree(inlineTree);
+    } else {
+      // print this method only
+    }
+  }
+}
+
+// Has to be called whenever a method is compiled
+void IdealGraphPrinter::begin_method(Compile* compile) {
+
+  ciMethod *method = compile->method();
+  assert(_output, "output stream must exist!");
+  assert(method, "null methods are not allowed!");
+  assert(!_current_method, "current method must be null!");
+
+  _arena->destruct_contents();
+
+  start_element(GROUP_ELEMENT);
+
+  // Print properties
+  Properties properties;
+
+  // Add method name
+  stringStream strStream;
+  method->print_name(&strStream);
+  properties.add(new Property(METHOD_NAME_PROPERTY, strStream.as_string()));
+
+  if (method->flags().is_public()) {
+    properties.add(new Property(METHOD_IS_PUBLIC_PROPERTY, TRUE_VALUE));
+  }
+
+  if (method->flags().is_static()) {
+    properties.add(new Property(METHOD_IS_STATIC_PROPERTY, TRUE_VALUE));
+  }
+
+  properties.print(this);
+
+  if (_stream) {
+    char answer = 0;
+    _stream->flush();
+    int result = _stream->read(&answer, 1);
+    _should_send_method = (answer == 'y');
+  }
+
+  this->_nodes = GrowableArray<NodeDescription *>(_arena, 2, 0, NULL);
+  this->_edges = GrowableArray< EdgeDescription * >(_arena, 2, 0, NULL);
+
+
+  this->_current_method = method;
+
+
+
+  _output->flush();
+}
+
+// Has to be called whenever a method has finished compilation
+void IdealGraphPrinter::end_method() {
+
+//  if (finish && !in_method) return;
+
+  nmethod* method = (nmethod*)this->_current_method->code();
+
+  start_element(ASSEMBLY_ELEMENT);
+ // Disassembler::decode(method, _output);
+  end_element(ASSEMBLY_ELEMENT);
+
+
+  end_element(GROUP_ELEMENT);
+  _current_method = NULL;
+  _output->flush();
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      delete desc;
+      _nodes.at_put(i, NULL);
+    }
+  }
+  this->_nodes.clear();
+
+
+  for (int i = 0; i < _edges.length(); i++) {
+   // for (int j=0; j<_edges.at(i)->length(); j++) {
+      EdgeDescription *conn = _edges.at(i);
+      conn->print(this);
+      if (conn) {
+        delete conn;
+        _edges.at_put(i, NULL);
+      }
+    //}
+    //_edges.at(i)->clear();
+    //delete _edges.at(i);
+    //_edges.at_put(i, NULL);
+  }
+  this->_edges.clear();
+
+//  in_method = false;
+}
+
+// Outputs an XML start element
+void IdealGraphPrinter::start_element(const char *s, Properties *properties /* = NULL */, bool print_indent /* = false */, bool print_return /* = true */) {
+
+  start_element_helper(s, properties, false, print_indent, print_return);
+  _depth++;
+
+}
+
+// Outputs an XML start element without body
+void IdealGraphPrinter::simple_element(const char *s, Properties *properties /* = NULL */, bool print_indent /* = false */) {
+  start_element_helper(s, properties, true, print_indent, true);
+}
+
+// Outputs an XML start element. If outputEnd is true, the element has no body.
+void IdealGraphPrinter::start_element_helper(const char *s, Properties *properties, bool outputEnd, bool print_indent /* = false */, bool print_return /* = true */) {
+
+  assert(_output, "output stream must exist!");
+
+  if (print_indent) this->print_indent();
+  _output->print("<");
+  _output->print(s);
+  if (properties) properties->print_as_attributes(this);
+
+  if (outputEnd) {
+    _output->print("/");
+  }
+
+  _output->print(">");
+  if (print_return) _output->print_cr("");
+
+}
+
+// Print indent
+void IdealGraphPrinter::print_indent() {
+  for (int i = 0; i < _depth; i++) {
+    _output->print(INDENT);
+  }
+}
+
+// Outputs an XML end element
+void IdealGraphPrinter::end_element(const char *s, bool print_indent /* = true */, bool print_return /* = true */) {
+
+  assert(_output, "output stream must exist!");
+
+  _depth--;
+
+  if (print_indent) this->print_indent();
+  _output->print("</");
+  _output->print(s);
+  _output->print(">");
+  if (print_return) _output->print_cr("");
+
+}
+
+bool IdealGraphPrinter::traverse_outs() {
+  return _traverse_outs;
+}
+
+void IdealGraphPrinter::set_traverse_outs(bool b) {
+  _traverse_outs = b;
+}
+
+void IdealGraphPrinter::walk(Node *start) {
+
+
+  VectorSet visited(Thread::current()->resource_area());
+  GrowableArray<Node *> nodeStack(Thread::current()->resource_area(), 0, 0, NULL);
+  nodeStack.push(start);
+  visited.test_set(start->_idx);
+  while(nodeStack.length() > 0) {
+
+    Node *n = nodeStack.pop();
+    IdealGraphPrinter::pre_node(n, this);
+
+    if (_traverse_outs) {
+      for (DUIterator i = n->outs(); n->has_out(i); i++) {
+        Node* p = n->out(i);
+        if (!visited.test_set(p->_idx)) {
+          nodeStack.push(p);
+        }
+      }
+    }
+
+    for ( uint i = 0; i < n->len(); i++ ) {
+      if ( n->in(i) ) {
+        if (!visited.test_set(n->in(i)->_idx)) {
+          nodeStack.push(n->in(i));
+        }
+      }
+    }
+  }
+}
+
+void IdealGraphPrinter::compress(int index, GrowableArray<Block>* blocks) {
+  Block *block = blocks->adr_at(index);
+
+  int ancestor = block->ancestor();
+  assert(ancestor != -1, "");
+
+  Block *ancestor_block = blocks->adr_at(ancestor);
+  if (ancestor_block->ancestor() != -1) {
+    compress(ancestor, blocks);
+
+    int label = block->label();
+    Block *label_block = blocks->adr_at(label);
+
+    int ancestor_label = ancestor_block->label();
+    Block *ancestor_label_block = blocks->adr_at(label);
+    if (ancestor_label_block->semi() < label_block->semi()) {
+      block->set_label(ancestor_label);
+    }
+
+    block->set_ancestor(ancestor_block->ancestor());
+  }
+}
+
+int IdealGraphPrinter::eval(int index, GrowableArray<Block>* blocks) {
+  Block *block = blocks->adr_at(index);
+  if (block->ancestor() == -1) {
+    return index;
+  } else {
+    compress(index, blocks);
+    return block->label();
+  }
+}
+
+void IdealGraphPrinter::link(int index1, int index2, GrowableArray<Block>* blocks) {
+  Block *block2 = blocks->adr_at(index2);
+  block2->set_ancestor(index1);
+}
+
+void IdealGraphPrinter::build_dominators(GrowableArray<Block>* blocks) {
+
+  if (blocks->length() == 0) return;
+
+  GrowableArray<int> stack;
+  stack.append(0);
+
+  GrowableArray<Block *> array;
+
+  assert(blocks->length() > 0, "");
+  blocks->adr_at(0)->set_dominator(0);
+
+  int n = 0;
+  while(!stack.is_empty()) {
+    int index = stack.pop();
+    Block *block = blocks->adr_at(index);
+    block->set_semi(n);
+    array.append(block);
+    n = n + 1;
+    for (int i = 0; i < block->succs()->length(); i++) {
+      int succ_index = block->succs()->at(i);
+      Block *succ = blocks->adr_at(succ_index);
+      if (succ->semi() == -1) {
+        succ->set_parent(index);
+        stack.push(succ_index);
+      }
+      succ->add_pred(index);
+    }
+  }
+
+  for (int i=n-1; i>0; i--) {
+    Block *block = array.at(i);
+    int block_index = block->index();
+    for (int j=0; j<block->pred()->length(); j++) {
+      int pred_index = block->pred()->at(j);
+      int cur_index = eval(pred_index, blocks);
+
+      Block *cur_block = blocks->adr_at(cur_index);
+      if (cur_block->semi() < block->semi()) {
+        block->set_semi(cur_block->semi());
+      }
+    }
+
+    int semi_index = block->semi();
+    Block *semi_block = array.at(semi_index);
+    semi_block->add_to_bucket(block_index);
+
+    link(block->parent(), block_index, blocks);
+    Block *parent_block = blocks->adr_at(block->parent());
+
+    for (int j=0; j<parent_block->bucket()->length(); j++) {
+      int cur_index = parent_block->bucket()->at(j);
+      int new_index = eval(cur_index, blocks);
+      Block *cur_block = blocks->adr_at(cur_index);
+      Block *new_block = blocks->adr_at(new_index);
+      int dom = block->parent();
+
+      if (new_block->semi() < cur_block->semi()) {
+        dom = new_index;
+      }
+
+      cur_block->set_dominator(dom);
+    }
+
+    parent_block->clear_bucket();
+  }
+
+  for (int i=1; i < n; i++) {
+
+    Block *block = array.at(i);
+    int block_index = block->index();
+
+    int semi_index = block->semi();
+    Block *semi_block = array.at(semi_index);
+
+    if (block->dominator() != semi_block->index()) {
+      int new_dom = blocks->adr_at(block->dominator())->dominator();
+      block->set_dominator(new_dom);
+    }
+  }
+
+  for (int i = 0; i < blocks->length(); i++) {
+    if (blocks->adr_at(i)->dominator() == -1) {
+      blocks->adr_at(i)->set_dominator(0);
+    }
+  }
+
+  // Build dominates array
+  for (int i=1; i < blocks->length(); i++) {
+    Block *block = blocks->adr_at(i);
+    int dominator = block->dominator();
+    Block *dom_block = blocks->adr_at(dominator);
+    dom_block->add_dominates(i);
+    dom_block->add_child(i);
+
+    while(dominator != 0) {
+      dominator = dom_block->dominator();
+      dom_block = blocks->adr_at(dominator);
+      dom_block->add_child(i);
+    }
+  }
+}
+
+void IdealGraphPrinter::build_common_dominator(int **common_dominator, int index, GrowableArray<Block>* blocks) {
+
+  common_dominator[index][index] = index;
+  Block *block = blocks->adr_at(index);
+  for (int i = 0; i < block->dominates()->length(); i++) {
+    Block *dominated = blocks->adr_at(block->dominates()->at(i));
+
+    for (int j=0; j<dominated->children()->length(); j++) {
+      Block *child = blocks->adr_at(dominated->children()->at(j));
+      common_dominator[index][child->index()] = common_dominator[child->index()][index] = index;
+
+      for (int k=0; k<i; k++) {
+        Block *other_dominated = blocks->adr_at(block->dominates()->at(k));
+        common_dominator[child->index()][other_dominated->index()] = common_dominator[other_dominated->index()][child->index()] = index;
+
+        for (int l=0 ; l<other_dominated->children()->length(); l++) {
+          Block *other_child = blocks->adr_at(other_dominated->children()->at(l));
+          common_dominator[child->index()][other_child->index()] = common_dominator[other_child->index()][child->index()] = index;
+        }
+      }
+    }
+
+    build_common_dominator(common_dominator, dominated->index(), blocks);
+  }
+}
+
+void IdealGraphPrinter::schedule_latest(int **common_dominator, GrowableArray<Block>* blocks) {
+
+  int queue_size = _nodes.length() + 1;
+  NodeDescription **queue = NEW_RESOURCE_ARRAY(NodeDescription *, queue_size);
+  int queue_start = 0;
+  int queue_end = 0;
+  Arena *a = new Arena();
+  VectorSet on_queue(a);
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      desc->init_succs();
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      for (uint j=0; j<desc->node()->len(); j++) {
+        Node *n = desc->node()->in(j);
+        if (n) {
+          NodeDescription *other_desc = _nodes.at(n->_idx);
+          other_desc->add_succ(desc);
+        }
+      }
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc && desc->block_index() == -1) {
+
+      // Put Phi into same block as region
+      if (desc->node()->is_Phi() && desc->node()->in(0) && _nodes.at(desc->node()->in(0)->_idx)->block_index() != -1) {
+        int index = _nodes.at(desc->node()->in(0)->_idx)->block_index();
+        desc->set_block_index(index);
+        blocks->adr_at(index)->add_node(desc);
+
+      // Put Projections to same block as parent
+      } else if (desc->node()->is_block_proj() && _nodes.at(desc->node()->is_block_proj()->_idx)->block_index() != -1) {
+        int index = _nodes.at(desc->node()->is_block_proj()->_idx)->block_index();
+        desc->set_block_index(index);
+        blocks->adr_at(index)->add_node(desc);
+      } else {
+        queue[queue_end] = desc;
+        queue_end++;
+        on_queue.set(desc->node()->_idx);
+      }
+    }
+  }
+
+
+  int z = 0;
+  while(queue_start != queue_end && z < 10000) {
+
+    NodeDescription *desc = queue[queue_start];
+    queue_start = (queue_start + 1) % queue_size;
+    on_queue >>= desc->node()->_idx;
+
+    Node* node = desc->node();
+
+    if (desc->succs()->length() == 0) {
+      int x = 0;
+    }
+
+    int block_index = -1;
+    if (desc->succs()->length() != 0) {
+      for (int i = 0; i < desc->succs()->length(); i++) {
+          NodeDescription *cur_desc = desc->succs()->at(i);
+          if (cur_desc != desc) {
+            if (cur_desc->succs()->length() == 0) {
+
+              // Ignore nodes with 0 successors
+
+            } else if (cur_desc->block_index() == -1) {
+
+              // Let this node schedule first
+              block_index = -1;
+              break;
+
+            } else if (cur_desc->node()->is_Phi()){
+
+              // Special treatment for Phi functions
+              PhiNode *phi = cur_desc->node()->as_Phi();
+              assert(phi->in(0) && phi->in(0)->is_Region(), "Must have region node in first input");
+              RegionNode *region = phi->in(0)->as_Region();
+
+              for (uint j=1; j<phi->len(); j++) {
+                Node *cur_phi_input = phi->in(j);
+                if (cur_phi_input == desc->node() && region->in(j)) {
+                  NodeDescription *cur_region_input = _nodes.at(region->in(j)->_idx);
+                  if (cur_region_input->block_index() == -1) {
+
+                    // Let this node schedule first
+                    block_index = -1;
+                    break;
+                  } else {
+                    if (block_index == -1) {
+                      block_index = cur_region_input->block_index();
+                    } else {
+                      block_index = common_dominator[block_index][cur_region_input->block_index()];
+                    }
+                  }
+                }
+              }
+
+            } else {
+              if (block_index == -1) {
+                block_index = cur_desc->block_index();
+              } else {
+                block_index = common_dominator[block_index][cur_desc->block_index()];
+              }
+            }
+          }
+      }
+    }
+
+    if (block_index == -1) {
+      queue[queue_end] = desc;
+      queue_end = (queue_end + 1) % queue_size;
+      on_queue.set(desc->node()->_idx);
+      z++;
+    } else {
+      assert(desc->block_index() == -1, "");
+      desc->set_block_index(block_index);
+      blocks->adr_at(block_index)->add_node(desc);
+      z = 0;
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc && desc->block_index() == -1) {
+
+      //if (desc->node()->is_Proj() || desc->node()->is_Con()) {
+        Node *parent = desc->node()->in(0);
+        uint cur = 1;
+        while(!parent && cur < desc->node()->len()) {
+          parent = desc->node()->in(cur);
+          cur++;
+        }
+
+        if (parent && _nodes.at(parent->_idx)->block_index() != -1) {
+          int index = _nodes.at(parent->_idx)->block_index();
+          desc->set_block_index(index);
+          blocks->adr_at(index)->add_node(desc);
+        } else {
+          desc->set_block_index(0);
+          blocks->adr_at(0)->add_node(desc);
+          //ShouldNotReachHere();
+        }
+      //}
+      /*
+      if (desc->node()->is_block_proj() && _nodes.at(desc->node()->is_block_proj()->_idx)->block_index() != -1) {
+        int index = _nodes.at(desc->node()->is_block_proj()->_idx)->block_index();
+        desc->set_block_index(index);
+        blocks->adr_at(index)->add_node(desc);
+      } */
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      desc->clear_succs();
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      int block_index = desc->block_index();
+
+      assert(block_index >= 0 && block_index < blocks->length(), "Block index must be in range");
+      assert(blocks->adr_at(block_index)->nodes()->contains(desc), "Node must be child of block");
+    }
+  }
+  a->destruct_contents();
+}
+
+void IdealGraphPrinter::build_blocks(Node *root) {
+
+  Arena *a = new Arena();
+  Node_Stack stack(a, 100);
+
+  VectorSet visited(a);
+  stack.push(root, 0);
+  GrowableArray<Block> blocks(a, 2, 0, Block(0));
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    if (_nodes.at(i)) _nodes.at(i)->set_block_index(-1);
+  }
+
+
+  // Order nodes such that node index is equal to idx
+  for (int i = 0; i < _nodes.length(); i++) {
+
+    if (_nodes.at(i)) {
+      NodeDescription *node = _nodes.at(i);
+      int index = node->node()->_idx;
+      if (index != i) {
+        _nodes.at_grow(index);
+        NodeDescription *tmp = _nodes.at(index);
+        *(_nodes.adr_at(index)) = node;
+        *(_nodes.adr_at(i)) = tmp;
+        i--;
+      }
+    }
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *node = _nodes.at(i);
+    if (node) {
+      assert(node->node()->_idx == (uint)i, "");
+    }
+  }
+
+  while(stack.is_nonempty()) {
+
+    //Node *n = stack.node();
+    //int index = stack.index();
+    Node *proj = stack.node();//n->in(index);
+    const Node *parent = proj->is_block_proj();
+    if (parent == NULL) {
+      parent = proj;
+    }
+
+    if (!visited.test_set(parent->_idx)) {
+
+      NodeDescription *end_desc = _nodes.at(parent->_idx);
+      int block_index = blocks.length();
+      Block block(block_index);
+      blocks.append(block);
+      Block *b = blocks.adr_at(block_index);
+      b->set_start(end_desc);
+     // assert(end_desc->block_index() == -1, "");
+      end_desc->set_block_index(block_index);
+      b->add_node(end_desc);
+
+      // Skip any control-pinned middle'in stuff
+      Node *p = proj;
+      NodeDescription *start_desc = NULL;
+      do {
+        proj = p;                   // Update pointer to last Control
+        if (p->in(0) == NULL) {
+          start_desc = end_desc;
+          break;
+        }
+        p = p->in(0);               // Move control forward
+        start_desc = _nodes.at(p->_idx);
+        assert(start_desc, "");
+
+        if (start_desc != end_desc && start_desc->block_index() == -1) {
+          assert(start_desc->block_index() == -1, "");
+          assert(block_index < blocks.length(), "");
+          start_desc->set_block_index(block_index);
+          b->add_node(start_desc);
+        }
+     } while( !p->is_block_proj() &&
+               !p->is_block_start() );
+
+      for (uint i = 0; i < start_desc->node()->len(); i++) {
+
+          Node *pred_node = start_desc->node()->in(i);
+
+
+          if (pred_node && pred_node != start_desc->node()) {
+            const Node *cur_parent = pred_node->is_block_proj();
+            if (cur_parent != NULL) {
+              pred_node = (Node *)cur_parent;
+            }
+
+            NodeDescription *pred_node_desc = _nodes.at(pred_node->_idx);
+            if (pred_node_desc->block_index() != -1) {
+              blocks.adr_at(pred_node_desc->block_index())->add_succ(block_index);
+            }
+          }
+      }
+
+      for (DUIterator_Fast dmax, i = end_desc->node()->fast_outs(dmax); i < dmax; i++) {
+        Node* cur_succ = end_desc->node()->fast_out(i);
+        NodeDescription *cur_succ_desc = _nodes.at(cur_succ->_idx);
+
+        DUIterator_Fast dmax2, i2 = cur_succ->fast_outs(dmax2);
+        if (cur_succ->is_block_proj() && i2 < dmax2 && !cur_succ->is_Root()) {
+
+          for (; i2<dmax2; i2++) {
+            Node *cur_succ2 = cur_succ->fast_out(i2);
+            if (cur_succ2) {
+              cur_succ_desc = _nodes.at(cur_succ2->_idx);
+              if (cur_succ_desc == NULL) {
+                // dead node so skip it
+                continue;
+              }
+              if (cur_succ2 != end_desc->node() && cur_succ_desc->block_index() != -1) {
+                b->add_succ(cur_succ_desc->block_index());
+              }
+            }
+          }
+
+        } else {
+
+          if (cur_succ != end_desc->node() && cur_succ_desc && cur_succ_desc->block_index() != -1) {
+            b->add_succ(cur_succ_desc->block_index());
+          }
+        }
+      }
+
+
+      int num_preds = p->len();
+      int bottom = -1;
+      if (p->is_Region() || p->is_Phi()) {
+        bottom = 0;
+      }
+
+      int pushed = 0;
+      for (int i=num_preds - 1; i > bottom; i--) {
+        if (p->in(i) != NULL && p->in(i) != p) {
+          stack.push(p->in(i), 0);
+          pushed++;
+        }
+      }
+
+      if (pushed == 0 && p->is_Root() && !_matcher) {
+        // Special case when backedges to root are not yet built
+        for (int i = 0; i < _nodes.length(); i++) {
+          if (_nodes.at(i) && _nodes.at(i)->node()->is_SafePoint() && _nodes.at(i)->node()->outcnt() == 0) {
+            stack.push(_nodes.at(i)->node(), 0);
+          }
+        }
+      }
+
+    } else {
+      stack.pop();
+    }
+  }
+
+  build_dominators(&blocks);
+
+  int **common_dominator = NEW_RESOURCE_ARRAY(int *, blocks.length());
+  for (int i = 0; i < blocks.length(); i++) {
+    int *cur = NEW_RESOURCE_ARRAY(int, blocks.length());
+    common_dominator[i] = cur;
+
+    for (int j=0; j<blocks.length(); j++) {
+      cur[j] = 0;
+    }
+  }
+
+  for (int i = 0; i < blocks.length(); i++) {
+    blocks.adr_at(i)->add_child(blocks.adr_at(i)->index());
+  }
+  build_common_dominator(common_dominator, 0, &blocks);
+
+  schedule_latest(common_dominator, &blocks);
+
+  start_element(CONTROL_FLOW_ELEMENT);
+
+  for (int i = 0; i < blocks.length(); i++) {
+    Block *block = blocks.adr_at(i);
+
+    Properties props;
+    props.add(new Property(BLOCK_NAME_PROPERTY, i));
+    props.add(new Property(BLOCK_DOMINATOR_PROPERTY, block->dominator()));
+    start_element(BLOCK_ELEMENT, &props);
+
+    if (block->succs()->length() > 0) {
+      start_element(SUCCESSORS_ELEMENT);
+      for (int j=0; j<block->succs()->length(); j++) {
+        int cur_index = block->succs()->at(j);
+        if (cur_index != 0 /* start_block has must not have inputs */) {
+          Properties properties;
+          properties.add(new Property(BLOCK_NAME_PROPERTY, cur_index));
+          simple_element(SUCCESSOR_ELEMENT, &properties);
+        }
+      }
+      end_element(SUCCESSORS_ELEMENT);
+    }
+
+    start_element(NODES_ELEMENT);
+
+    for (int j=0; j<block->nodes()->length(); j++) {
+      NodeDescription *n = block->nodes()->at(j);
+      Properties properties;
+      properties.add(new Property(NODE_ID_PROPERTY, n->id()));
+      simple_element(NODE_ELEMENT, &properties);
+    }
+
+    end_element(NODES_ELEMENT);
+
+    end_element(BLOCK_ELEMENT);
+  }
+
+
+  end_element(CONTROL_FLOW_ELEMENT);
+
+  a->destruct_contents();
+}
+
+void IdealGraphPrinter::print_method(Compile* compile, const char *name, int level, bool clear_nodes) {
+  print(compile, name, (Node *)compile->root(), level, clear_nodes);
+}
+
+// Print current ideal graph
+void IdealGraphPrinter::print(Compile* compile, const char *name, Node *node, int level, bool clear_nodes) {
+
+//  if (finish && !in_method) return;
+  if (!_current_method || !_should_send_method || level > PrintIdealGraphLevel) return;
+
+  assert(_current_method, "newMethod has to be called first!");
+
+  if (clear_nodes) {
+    int x = 0;
+  }
+
+  _clear_nodes = clear_nodes;
+
+  // Warning, unsafe cast?
+  _chaitin = (PhaseChaitin *)compile->regalloc();
+  _matcher = compile->matcher();
+
+
+  // Update nodes
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      desc->set_state(Invalid);
+    }
+  }
+  Node *n = node;
+  walk(n);
+
+  // Update edges
+  for (int i = 0; i < _edges.length(); i++) {
+      _edges.at(i)->set_state(Invalid);
+  }
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc && desc->state() != Invalid) {
+
+      int to = desc->id();
+      uint len = desc->node()->len();
+      for (uint j=0; j<len; j++) {
+        Node *n = desc->node()->in(j);
+
+        if (n) {
+
+
+          intptr_t from = (intptr_t)n;
+
+          // Assert from node is valid
+          /*
+          bool ok = false;
+          for (int k=0; k<_nodes.length(); k++) {
+            NodeDescription *desc = _nodes.at(k);
+            if (desc && desc->id() == from) {
+              assert(desc->state() != Invalid, "");
+              ok = true;
+            }
+          }
+          assert(ok, "");*/
+
+          uint index = j;
+          if (index >= desc->node()->req()) {
+            index = desc->node()->req();
+          }
+
+          print_edge(from, to, index);
+        }
+      }
+    }
+  }
+
+  bool is_different = false;
+
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc && desc->state() != Valid) {
+      is_different = true;
+      break;
+    }
+  }
+
+  if (!is_different) {
+    for (int i = 0; i < _edges.length(); i++) {
+      EdgeDescription *conn = _edges.at(i);
+      if (conn && conn->state() != Valid) {
+        is_different = true;
+        break;
+      }
+    }
+  }
+
+  // No changes -> do not print graph
+  if (!is_different) return;
+
+  Properties properties;
+  properties.add(new Property(GRAPH_NAME_PROPERTY, (const char *)name));
+  start_element(GRAPH_ELEMENT, &properties);
+
+  start_element(NODES_ELEMENT);
+  for (int i = 0; i < _nodes.length(); i++) {
+    NodeDescription *desc = _nodes.at(i);
+    if (desc) {
+      desc->print(this);
+      if (desc->state() == Invalid) {
+        delete desc;
+        _nodes.at_put(i, NULL);
+      } else {
+        desc->set_state(Valid);
+      }
+    }
+  }
+  end_element(NODES_ELEMENT);
+
+  build_blocks(node);
+
+  start_element(EDGES_ELEMENT);
+  for (int i = 0; i < _edges.length(); i++) {
+    EdgeDescription *conn = _edges.at(i);
+
+    // Assert from and to nodes are valid
+    /*
+    if (!conn->state() == Invalid) {
+      bool ok1 = false;
+      bool ok2 = false;
+      for (int j=0; j<_nodes.length(); j++) {
+        NodeDescription *desc = _nodes.at(j);
+        if (desc && desc->id() == conn->from()) {
+          ok1 = true;
+        }
+
+        if (desc && desc->id() == conn->to()) {
+          ok2 = true;
+        }
+      }
+
+      assert(ok1, "from node not found!");
+      assert(ok2, "to node not found!");
+    }*/
+
+    conn->print(this);
+    if (conn->state() == Invalid) {
+      _edges.remove_at(i);
+      delete conn;
+      i--;
+    }
+  }
+
+  end_element(EDGES_ELEMENT);
+
+  end_element(GRAPH_ELEMENT);
+
+  _output->flush();
+}
+
+// Print edge
+void IdealGraphPrinter::print_edge(int from, int to, int index) {
+
+  EdgeDescription *conn = new EdgeDescription(from, to, index);
+  for (int i = 0; i < _edges.length(); i++) {
+    if (_edges.at(i)->equals(conn)) {
+      conn->set_state(Valid);
+      delete _edges.at(i);
+      _edges.at_put(i, conn);
+      return;
+    }
+  }
+
+  _edges.append(conn);
+}
+
+extern const char *NodeClassNames[];
+
+// Create node description
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::create_node_description(Node* node) {
+
+#ifndef PRODUCT
+  node->_in_dump_cnt++;
+  NodeDescription *desc = new NodeDescription(node);
+  desc->properties()->add(new Property(NODE_NAME_PROPERTY, (const char *)node->Name()));
+
+  const Type *t = node->bottom_type();
+  desc->properties()->add(new Property("type", (const char *)Type::msg[t->base()]));
+
+  desc->properties()->add(new Property("idx", node->_idx));
+#ifdef ASSERT
+  desc->properties()->add(new Property("debug_idx", node->_debug_idx));
+#endif
+
+
+  const jushort flags = node->flags();
+  if (flags & Node::Flag_is_Copy) {
+    desc->properties()->add(new Property("is_copy", "true"));
+  }
+  if (flags & Node::Flag_is_Call) {
+    desc->properties()->add(new Property("is_call", "true"));
+  }
+  if (flags & Node::Flag_rematerialize) {
+    desc->properties()->add(new Property("rematerialize", "true"));
+  }
+  if (flags & Node::Flag_needs_anti_dependence_check) {
+    desc->properties()->add(new Property("needs_anti_dependence_check", "true"));
+  }
+  if (flags & Node::Flag_is_macro) {
+    desc->properties()->add(new Property("is_macro", "true"));
+  }
+  if (flags & Node::Flag_is_Con) {
+    desc->properties()->add(new Property("is_con", "true"));
+  }
+  if (flags & Node::Flag_is_cisc_alternate) {
+    desc->properties()->add(new Property("is_cisc_alternate", "true"));
+  }
+  if (flags & Node::Flag_is_Branch) {
+    desc->properties()->add(new Property("is_branch", "true"));
+  }
+  if (flags & Node::Flag_is_block_start) {
+    desc->properties()->add(new Property("is_block_start", "true"));
+  }
+  if (flags & Node::Flag_is_Goto) {
+    desc->properties()->add(new Property("is_goto", "true"));
+  }
+  if (flags & Node::Flag_is_dead_loop_safe) {
+    desc->properties()->add(new Property("is_dead_loop_safe", "true"));
+  }
+  if (flags & Node::Flag_may_be_short_branch) {
+    desc->properties()->add(new Property("may_be_short_branch", "true"));
+  }
+  if (flags & Node::Flag_is_safepoint_node) {
+    desc->properties()->add(new Property("is_safepoint_node", "true"));
+  }
+  if (flags & Node::Flag_is_pc_relative) {
+    desc->properties()->add(new Property("is_pc_relative", "true"));
+  }
+
+  if (_matcher) {
+    if (_matcher->is_shared(desc->node())) {
+      desc->properties()->add(new Property("is_shared", "true"));
+    } else {
+      desc->properties()->add(new Property("is_shared", "false"));
+    }
+
+    if (_matcher->is_dontcare(desc->node())) {
+      desc->properties()->add(new Property("is_dontcare", "true"));
+    } else {
+      desc->properties()->add(new Property("is_dontcare", "false"));
+    }
+  }
+
+  if (node->is_Proj()) {
+    desc->properties()->add(new Property("con", (int)node->as_Proj()->_con));
+  }
+
+  if (node->is_Mach()) {
+    desc->properties()->add(new Property("idealOpcode", (const char *)NodeClassNames[node->as_Mach()->ideal_Opcode()]));
+  }
+
+
+
+
+
+  outputStream *oldTty = tty;
+  buffer[0] = 0;
+  stringStream s2(buffer, sizeof(buffer) - 1);
+
+  node->dump_spec(&s2);
+  assert(s2.size() < sizeof(buffer), "size in range");
+  desc->properties()->add(new Property("dump_spec", buffer));
+
+  if (node->is_block_proj()) {
+    desc->properties()->add(new Property("is_block_proj", "true"));
+  }
+
+  if (node->is_block_start()) {
+    desc->properties()->add(new Property("is_block_start", "true"));
+  }
+
+  const char *short_name = "short_name";
+  if (strcmp(node->Name(), "Parm") == 0 && node->as_Proj()->_con >= TypeFunc::Parms) {
+      int index = node->as_Proj()->_con - TypeFunc::Parms;
+      if (index >= 10) {
+        desc->properties()->add(new Property(short_name, "PA"));
+      } else {
+        sprintf(buffer, "P%d", index);
+        desc->properties()->add(new Property(short_name, buffer));
+      }
+  } else if (strcmp(node->Name(), "IfTrue") == 0) {
+     desc->properties()->add(new Property(short_name, "T"));
+  } else if (strcmp(node->Name(), "IfFalse") == 0) {
+     desc->properties()->add(new Property(short_name, "F"));
+  } else if ((node->is_Con() && node->is_Type()) || node->is_Proj()) {
+
+    if (t->base() == Type::Int && t->is_int()->is_con()) {
+      const TypeInt *typeInt = t->is_int();
+      assert(typeInt->is_con(), "must be constant");
+      jint value = typeInt->get_con();
+
+      // max. 2 chars allowed
+      if (value >= -9 && value <= 99) {
+        sprintf(buffer, "%d", value);
+        desc->properties()->add(new Property(short_name, buffer));
+      }
+      else
+      {
+        desc->properties()->add(new Property(short_name, "I"));
+      }
+    } else if (t == Type::TOP) {
+      desc->properties()->add(new Property(short_name, "^"));
+    } else if (t->base() == Type::Long && t->is_long()->is_con()) {
+      const TypeLong *typeLong = t->is_long();
+      assert(typeLong->is_con(), "must be constant");
+      jlong value = typeLong->get_con();
+
+      // max. 2 chars allowed
+      if (value >= -9 && value <= 99) {
+        sprintf(buffer, "%d", value);
+        desc->properties()->add(new Property(short_name, buffer));
+      }
+      else
+      {
+        desc->properties()->add(new Property(short_name, "L"));
+      }
+    } else if (t->base() == Type::KlassPtr) {
+      const TypeKlassPtr *typeKlass = t->is_klassptr();
+      desc->properties()->add(new Property(short_name, "CP"));
+    } else if (t->base() == Type::Control) {
+      desc->properties()->add(new Property(short_name, "C"));
+    } else if (t->base() == Type::Memory) {
+      desc->properties()->add(new Property(short_name, "M"));
+    } else if (t->base() == Type::Abio) {
+      desc->properties()->add(new Property(short_name, "IO"));
+    } else if (t->base() == Type::Return_Address) {
+      desc->properties()->add(new Property(short_name, "RA"));
+    } else if (t->base() == Type::AnyPtr) {
+      desc->properties()->add(new Property(short_name, "P"));
+    } else if (t->base() == Type::RawPtr) {
+      desc->properties()->add(new Property(short_name, "RP"));
+    } else if (t->base() == Type::AryPtr) {
+      desc->properties()->add(new Property(short_name, "AP"));
+    }
+  }
+
+  if (node->is_SafePoint()) {
+    SafePointNode *safePointNode = node->as_SafePoint();
+    if (safePointNode->jvms()) {
+      stringStream bciStream;
+      bciStream.print("%d ", safePointNode->jvms()->bci());
+      JVMState *caller = safePointNode->jvms()->caller();
+      while(caller) {
+        bciStream.print("%d ", caller->bci());
+
+        caller = caller->caller();
+      }
+      desc->properties()->add(new Property("bci", bciStream.as_string()));
+    }
+  }
+
+  if (_chaitin && _chaitin != (PhaseChaitin *)0xdeadbeef) {
+    buffer[0] = 0;
+    _chaitin->dump_register(node, buffer);
+    desc->properties()->add(new Property("reg", buffer));
+    desc->properties()->add(new Property("lrg", _chaitin->n2lidx(node)));
+  }
+
+
+  node->_in_dump_cnt--;
+  return desc;
+#else
+  return NULL;
+#endif
+}
+
+void IdealGraphPrinter::pre_node(Node* node, void *env) {
+
+  IdealGraphPrinter *printer = (IdealGraphPrinter *)env;
+
+  NodeDescription *newDesc = printer->create_node_description(node);
+
+  if (printer->_clear_nodes) {
+
+    printer->_nodes.append(newDesc);
+  } else {
+
+    NodeDescription *desc = printer->_nodes.at_grow(node->_idx, NULL);
+
+    if (desc && desc->equals(newDesc)) {
+      //desc->set_state(Valid);
+      //desc->set_node(node);
+      delete desc;
+      printer->_nodes.at_put(node->_idx, NULL);
+      newDesc->set_state(Valid);
+      //printer->_nodes.at_put(node->_idx, newDesc);
+    } else {
+
+      if (desc && desc->id() == newDesc->id()) {
+        delete desc;
+        printer->_nodes.at_put(node->_idx, NULL);
+        newDesc->set_state(New);
+
+      }
+
+      //if (desc) {
+      //  delete desc;
+      //}
+
+      //printer->_nodes.at_put(node->_idx, newDesc);
+    }
+
+    printer->_nodes.append(newDesc);
+  }
+}
+
+void IdealGraphPrinter::post_node(Node* node, void *env) {
+}
+
+outputStream *IdealGraphPrinter::output() {
+  return _output;
+}
+
+IdealGraphPrinter::Description::Description() {
+  _state = New;
+}
+
+void IdealGraphPrinter::Description::print(IdealGraphPrinter *printer) {
+  if (_state == Invalid) {
+    print_removed(printer);
+  } else if (_state == New) {
+    print_changed(printer);
+  }
+}
+
+void IdealGraphPrinter::Description::set_state(State s) {
+  _state = s;
+}
+
+IdealGraphPrinter::State IdealGraphPrinter::Description::state() {
+  return _state;
+}
+
+void IdealGraphPrinter::Block::set_proj(NodeDescription *n) {
+  _proj = n;
+}
+
+void IdealGraphPrinter::Block::set_start(NodeDescription *n) {
+  _start = n;
+}
+
+int IdealGraphPrinter::Block::semi() {
+  return _semi;
+}
+
+int IdealGraphPrinter::Block::parent() {
+  return _parent;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::bucket() {
+  return &_bucket;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::children() {
+  return &_children;
+}
+
+void IdealGraphPrinter::Block::add_child(int i) {
+  _children.append(i);
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::dominates() {
+  return &_dominates;
+}
+
+void IdealGraphPrinter::Block::add_dominates(int i) {
+  _dominates.append(i);
+}
+
+void IdealGraphPrinter::Block::add_to_bucket(int i) {
+  _bucket.append(i);
+}
+
+void IdealGraphPrinter::Block::clear_bucket() {
+  _bucket.clear();
+}
+
+void IdealGraphPrinter::Block::set_dominator(int i) {
+  _dominator = i;
+}
+
+void IdealGraphPrinter::Block::set_label(int i) {
+  _label = i;
+}
+
+int IdealGraphPrinter::Block::label() {
+  return _label;
+}
+
+int IdealGraphPrinter::Block::ancestor() {
+  return _ancestor;
+}
+
+void IdealGraphPrinter::Block::set_ancestor(int i) {
+  _ancestor = i;
+}
+
+int IdealGraphPrinter::Block::dominator() {
+  return _dominator;
+}
+
+int IdealGraphPrinter::Block::index() {
+  return _index;
+}
+
+void IdealGraphPrinter::Block::set_parent(int i) {
+  _parent = i;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::pred() {
+  return &_pred;
+}
+
+void IdealGraphPrinter::Block::set_semi(int i) {
+  _semi = i;
+}
+
+IdealGraphPrinter::Block::Block() {
+}
+
+IdealGraphPrinter::Block::Block(int index) {
+  _index = index;
+  _label = index;
+  _semi = -1;
+  _ancestor = -1;
+  _dominator = -1;
+}
+
+void IdealGraphPrinter::Block::add_pred(int i) {
+  _pred.append(i);
+}
+
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::Block::proj() {
+  return _proj;
+}
+
+IdealGraphPrinter::NodeDescription *IdealGraphPrinter::Block::start() {
+  return _start;
+}
+
+GrowableArray<int>* IdealGraphPrinter::Block::succs() {
+  return &_succs;
+}
+
+void IdealGraphPrinter::Block::add_succ(int index) {
+
+  if (this->_index == 16 && index == 15) {
+    int x = 0;
+  }
+
+  if (!_succs.contains(index)) {
+    _succs.append(index);
+  }
+}
+
+
+void IdealGraphPrinter::Block::add_node(NodeDescription *n) {
+  if (!_nodes.contains(n)) {
+    _nodes.append(n);
+  }
+}
+
+GrowableArray<IdealGraphPrinter::NodeDescription *>* IdealGraphPrinter::Block::nodes() {
+  return &_nodes;
+}
+
+int IdealGraphPrinter::NodeDescription::count = 0;
+
+IdealGraphPrinter::NodeDescription::NodeDescription(Node* node) : _node(node) {
+  _id = (intptr_t)(node);
+  _block_index = -1;
+}
+
+IdealGraphPrinter::NodeDescription::~NodeDescription() {
+  _properties.clean();
+}
+
+// void IdealGraphPrinter::NodeDescription::set_node(Node* node) {
+//   //this->_node = node;
+// }
+
+int IdealGraphPrinter::NodeDescription::block_index() {
+  return _block_index;
+}
+
+
+GrowableArray<IdealGraphPrinter::NodeDescription *>* IdealGraphPrinter::NodeDescription::succs() {
+  return &_succs;
+}
+
+void IdealGraphPrinter::NodeDescription::clear_succs() {
+  _succs.clear();
+}
+
+void IdealGraphPrinter::NodeDescription::init_succs() {
+  _succs = GrowableArray<NodeDescription *>();
+}
+
+void IdealGraphPrinter::NodeDescription::add_succ(NodeDescription *desc) {
+  _succs.append(desc);
+}
+
+void IdealGraphPrinter::NodeDescription::set_block_index(int i) {
+  _block_index = i;
+}
+
+bool IdealGraphPrinter::NodeDescription::equals(NodeDescription *desc) {
+  if (desc == NULL) return false;
+  if (desc->id() != id()) return false;
+  return properties()->equals(desc->properties());
+}
+
+Node* IdealGraphPrinter::NodeDescription::node() {
+  return _node;
+}
+
+IdealGraphPrinter::Properties* IdealGraphPrinter::NodeDescription::properties() {
+  return &_properties;
+}
+
+uint IdealGraphPrinter::NodeDescription::id() {
+  return _id;
+}
+
+void IdealGraphPrinter::NodeDescription::print_changed(IdealGraphPrinter *printer) {
+
+
+  Properties properties;
+  properties.add(new Property(NODE_ID_PROPERTY, id()));
+  printer->start_element(NODE_ELEMENT, &properties);
+
+  this->properties()->print(printer);
+
+
+  printer->end_element(NODE_ELEMENT);
+}
+
+void IdealGraphPrinter::NodeDescription::print_removed(IdealGraphPrinter *printer) {
+
+  Properties properties;
+  properties.add(new Property(NODE_ID_PROPERTY, id()));
+  printer->simple_element(REMOVE_NODE_ELEMENT, &properties);
+}
+
+IdealGraphPrinter::EdgeDescription::EdgeDescription(int from, int to, int index) {
+  this->_from = from;
+  this->_to = to;
+  this->_index = index;
+}
+
+IdealGraphPrinter::EdgeDescription::~EdgeDescription() {
+}
+
+int IdealGraphPrinter::EdgeDescription::from() {
+  return _from;
+}
+
+int IdealGraphPrinter::EdgeDescription::to() {
+  return _to;
+}
+
+void IdealGraphPrinter::EdgeDescription::print_changed(IdealGraphPrinter *printer) {
+
+  Properties properties;
+  properties.add(new Property(INDEX_PROPERTY, _index));
+  properties.add(new Property(FROM_PROPERTY, _from));
+  properties.add(new Property(TO_PROPERTY, _to));
+  printer->simple_element(EDGE_ELEMENT, &properties);
+}
+
+void IdealGraphPrinter::EdgeDescription::print_removed(IdealGraphPrinter *printer) {
+
+  Properties properties;
+  properties.add(new Property(INDEX_PROPERTY, _index));
+  properties.add(new Property(FROM_PROPERTY, _from));
+  properties.add(new Property(TO_PROPERTY, _to));
+  printer->simple_element(REMOVE_EDGE_ELEMENT, &properties);
+}
+
+bool IdealGraphPrinter::EdgeDescription::equals(IdealGraphPrinter::EdgeDescription *desc) {
+  if (desc == NULL) return false;
+  return (_from == desc->_from && _to == desc->_to && _index == desc->_index);
+}
+
+IdealGraphPrinter::Properties::Properties() : list(new (ResourceObj::C_HEAP) GrowableArray<Property *>(2, 0, NULL, true)) {
+}
+
+IdealGraphPrinter::Properties::~Properties() {
+  clean();
+  delete list;
+}
+
+void IdealGraphPrinter::Properties::add(Property *p) {
+  assert(p != NULL, "Property not NULL");
+  list->append(p);
+}
+
+void IdealGraphPrinter::Properties::print(IdealGraphPrinter *printer) {
+  printer->start_element(PROPERTIES_ELEMENT);
+
+  for (int i = 0; i < list->length(); i++) {
+    list->at(i)->print(printer);
+  }
+
+  printer->end_element(PROPERTIES_ELEMENT);
+}
+
+void IdealGraphPrinter::Properties::clean() {
+  for (int i = 0; i < list->length(); i++) {
+    delete list->at(i);
+    list->at_put(i, NULL);
+  }
+  list->clear();
+  assert(list->length() == 0, "List cleared");
+}
+
+void IdealGraphPrinter::Properties::remove(const char *name) {
+  for (int i = 0; i < list->length(); i++) {
+    if (strcmp(list->at(i)->name(), name) == 0) {
+      delete list->at(i);
+      list->remove_at(i);
+      i--;
+    }
+  }
+}
+
+void IdealGraphPrinter::Properties::print_as_attributes(IdealGraphPrinter *printer) {
+
+  for (int i = 0; i < list->length(); i++) {
+    assert(list->at(i) != NULL, "Property not null!");
+    printer->output()->print(" ");
+    list->at(i)->print_as_attribute(printer);
+  }
+}
+
+bool IdealGraphPrinter::Properties::equals(Properties* p) {
+  if (p->list->length() != this->list->length()) return false;
+
+  for (int i = 0; i < list->length(); i++) {
+    assert(list->at(i) != NULL, "Property not null!");
+    if (!list->at(i)->equals(p->list->at(i))) return false;
+  }
+
+  return true;
+}
+
+IdealGraphPrinter::Property::Property() {
+  _name = NULL;
+  _value = NULL;
+}
+
+const char *IdealGraphPrinter::Property::name() {
+  return _name;
+}
+
+IdealGraphPrinter::Property::Property(const Property* p) {
+
+  this->_name = NULL;
+  this->_value = NULL;
+
+  if (p->_name != NULL) {
+    _name = dup(p->_name);
+  }
+
+  if (p->_value) {
+    _value = dup(p->_value);
+  }
+}
+
+IdealGraphPrinter::Property::~Property() {
+
+  clean();
+}
+
+IdealGraphPrinter::Property::Property(const char *name, const char *value) {
+
+  assert(name, "Name must not be null!");
+  assert(value, "Value must not be null!");
+
+  _name = dup(name);
+  _value = dup(value);
+}
+
+IdealGraphPrinter::Property::Property(const char *name, int intValue) {
+  _name = dup(name);
+
+  stringStream stream;
+  stream.print("%d", intValue);
+  _value = dup(stream.as_string());
+}
+
+void IdealGraphPrinter::Property::clean() {
+  if (_name) {
+    delete _name;
+    _name = NULL;
+  }
+
+  if (_value) {
+    delete _value;
+    _value = NULL;
+  }
+}
+
+
+bool IdealGraphPrinter::Property::is_null() {
+  return _name == NULL;
+}
+
+void IdealGraphPrinter::Property::print(IdealGraphPrinter *printer) {
+
+  assert(!is_null(), "null properties cannot be printed!");
+  Properties properties;
+  properties.add(new Property(PROPERTY_NAME_PROPERTY, _name));
+  printer->start_element(PROPERTY_ELEMENT, &properties, false, false);
+  printer->print_xml(_value);
+  printer->end_element(PROPERTY_ELEMENT, false, true);
+}
+
+void IdealGraphPrinter::Property::print_as_attribute(IdealGraphPrinter *printer) {
+
+  printer->output()->print(_name);
+  printer->output()->print("=\"");
+  printer->print_xml(_value);
+  printer->output()->print("\"");
+}
+
+
+bool IdealGraphPrinter::Property::equals(Property* p) {
+
+  if (is_null() && p->is_null()) return true;
+  if (is_null()) return false;
+  if (p->is_null()) return false;
+
+  int cmp1 = strcmp(p->_name, _name);
+  if (cmp1 != 0) return false;
+
+  int cmp2 = strcmp(p->_value, _value);
+  if (cmp2 != 0) return false;
+
+  return true;
+}
+
+void IdealGraphPrinter::print_xml(const char *value) {
+  size_t len = strlen(value);
+
+  char buf[2];
+  buf[1] = 0;
+  for (size_t i = 0; i < len; i++) {
+    char c = value[i];
+
+    switch(c) {
+      case '<':
+        output()->print("&lt;");
+        break;
+
+      case '>':
+        output()->print("&gt;");
+        break;
+
+      default:
+        buf[0] = c;
+        output()->print(buf);
+        break;
+    }
+  }
+}
+
+#endif
diff --git a/src/share/vm/opto/idealGraphPrinter.hpp b/src/share/vm/opto/idealGraphPrinter.hpp
new file mode 100644
index 000000000..b73493e19
--- /dev/null
+++ b/src/share/vm/opto/idealGraphPrinter.hpp
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#ifndef PRODUCT
+
+class Compile;
+class PhaseIFG;
+class PhaseChaitin;
+class Matcher;
+class Node;
+class InlineTree;
+class ciMethod;
+
+class IdealGraphPrinter
+{
+private:
+
+  enum State
+  {
+    Invalid,
+    Valid,
+    New
+  };
+
+private:
+
+  static const char *INDENT;
+  static const char *TOP_ELEMENT;
+  static const char *GROUP_ELEMENT;
+  static const char *GRAPH_ELEMENT;
+  static const char *PROPERTIES_ELEMENT;
+  static const char *EDGES_ELEMENT;
+  static const char *PROPERTY_ELEMENT;
+  static const char *EDGE_ELEMENT;
+  static const char *NODE_ELEMENT;
+  static const char *NODES_ELEMENT;
+  static const char *CONTROL_FLOW_ELEMENT;
+  static const char *REMOVE_EDGE_ELEMENT;
+  static const char *REMOVE_NODE_ELEMENT;
+  static const char *METHOD_NAME_PROPERTY;
+  static const char *BLOCK_NAME_PROPERTY;
+  static const char *BLOCK_DOMINATOR_PROPERTY;
+  static const char *BLOCK_ELEMENT;
+  static const char *SUCCESSORS_ELEMENT;
+  static const char *SUCCESSOR_ELEMENT;
+  static const char *METHOD_IS_PUBLIC_PROPERTY;
+  static const char *METHOD_IS_STATIC_PROPERTY;
+  static const char *TRUE_VALUE;
+  static const char *NODE_NAME_PROPERTY;
+  static const char *EDGE_NAME_PROPERTY;
+  static const char *NODE_ID_PROPERTY;
+  static const char *FROM_PROPERTY;
+  static const char *TO_PROPERTY;
+  static const char *PROPERTY_NAME_PROPERTY;
+  static const char *GRAPH_NAME_PROPERTY;
+  static const char *INDEX_PROPERTY;
+  static const char *METHOD_ELEMENT;
+  static const char *INLINE_ELEMENT;
+  static const char *BYTECODES_ELEMENT;
+  static const char *METHOD_BCI_PROPERTY;
+  static const char *METHOD_SHORT_NAME_PROPERTY;
+  static const char *ASSEMBLY_ELEMENT;
+
+  class Property {
+
+  private:
+
+    const char *_name;
+    const char *_value;
+
+  public:
+
+    Property();
+    Property(const Property* p);
+    ~Property();
+    Property(const char *name, const char *value);
+    Property(const char *name, int value);
+    bool equals(Property* p);
+    void print(IdealGraphPrinter *printer);
+    void print_as_attribute(IdealGraphPrinter *printer);
+    bool is_null();
+    void clean();
+    const char *name();
+
+    static const char* dup(const char *str) {
+      char * copy = new char[strlen(str)+1];
+      strcpy(copy, str);
+      return copy;
+    }
+
+  };
+
+  class Properties {
+
+  private:
+
+    GrowableArray<Property *> *list;
+
+  public:
+
+    Properties();
+    ~Properties();
+    void add(Property *p);
+    void remove(const char *name);
+    bool equals(Properties* p);
+    void print(IdealGraphPrinter *printer);
+    void print_as_attributes(IdealGraphPrinter *printer);
+    void clean();
+
+  };
+
+
+  class Description {
+
+  private:
+
+    State _state;
+
+  public:
+
+    Description();
+
+    State state();
+    void set_state(State s);
+    void print(IdealGraphPrinter *printer);
+    virtual void print_changed(IdealGraphPrinter *printer) = 0;
+    virtual void print_removed(IdealGraphPrinter *printer) = 0;
+
+  };
+
+  class NodeDescription : public Description{
+
+  public:
+
+    static int count;
+
+  private:
+
+    GrowableArray<NodeDescription *> _succs;
+    int _block_index;
+    uintptr_t _id;
+    Properties _properties;
+    Node* _node;
+
+  public:
+
+    NodeDescription(Node* node);
+    ~NodeDescription();
+    Node* node();
+
+    // void set_node(Node* node);
+    GrowableArray<NodeDescription *>* succs();
+    void init_succs();
+    void clear_succs();
+    void add_succ(NodeDescription *desc);
+    int block_index();
+    void set_block_index(int i);
+    Properties* properties();
+    virtual void print_changed(IdealGraphPrinter *printer);
+    virtual void print_removed(IdealGraphPrinter *printer);
+    bool equals(NodeDescription *desc);
+    uint id();
+
+  };
+
+  class Block {
+
+  private:
+
+    NodeDescription *_start;
+    NodeDescription *_proj;
+    GrowableArray<int> _succs;
+    GrowableArray<NodeDescription *> _nodes;
+    GrowableArray<int> _dominates;
+    GrowableArray<int> _children;
+    int _semi;
+    int _parent;
+    GrowableArray<int> _pred;
+    GrowableArray<int> _bucket;
+    int _index;
+    int _dominator;
+    int _ancestor;
+    int _label;
+
+  public:
+
+    Block();
+    Block(int index);
+
+    void add_node(NodeDescription *n);
+    GrowableArray<NodeDescription *>* nodes();
+    GrowableArray<int>* children();
+    void add_child(int i);
+    void add_succ(int index);
+    GrowableArray<int>* succs();
+    GrowableArray<int>* dominates();
+    void add_dominates(int i);
+    NodeDescription *start();
+    NodeDescription *proj();
+    void set_start(NodeDescription *n);
+    void set_proj(NodeDescription *n);
+
+    int label();
+    void set_label(int i);
+    int ancestor();
+    void set_ancestor(int i);
+    int index();
+    int dominator();
+    void set_dominator(int i);
+    int parent();
+    void set_parent(int i);
+    int semi();
+    GrowableArray<int>* bucket();
+    void add_to_bucket(int i);
+    void clear_bucket();
+    GrowableArray<int>* pred();
+    void set_semi(int i);
+    void add_pred(int i);
+
+  };
+
+  class EdgeDescription : public Description {
+
+  private:
+
+    int _from;
+    int _to;
+    int _index;
+  public:
+
+    EdgeDescription(int from, int to, int index);
+    ~EdgeDescription();
+
+    virtual void print_changed(IdealGraphPrinter *printer);
+    virtual void print_removed(IdealGraphPrinter *printer);
+    bool equals(EdgeDescription *desc);
+    int from();
+    int to();
+  };
+
+
+  static int _file_count;
+  networkStream *_stream;
+  outputStream *_output;
+  ciMethod *_current_method;
+  GrowableArray<NodeDescription *> _nodes;
+  GrowableArray<EdgeDescription *> _edges;
+  int _depth;
+  Arena *_arena;
+  char buffer[128];
+  bool _should_send_method;
+  PhaseChaitin* _chaitin;
+  bool _clear_nodes;
+  Matcher* _matcher;
+  bool _traverse_outs;
+
+  void start_element_helper(const char *name, Properties *properties, bool endElement, bool print_indent = false, bool print_return = true);
+  NodeDescription *create_node_description(Node* node);
+
+  static void pre_node(Node* node, void *env);
+  static void post_node(Node* node, void *env);
+
+  void schedule_latest(int **common_dominator, GrowableArray<Block>* blocks);
+  void build_common_dominator(int **common_dominator, int index, GrowableArray<Block>* blocks);
+  void compress(int index, GrowableArray<Block>* blocks);
+  int eval(int index, GrowableArray<Block>* blocks);
+  void link(int index1, int index2, GrowableArray<Block>* blocks);
+  void build_dominators(GrowableArray<Block>* blocks);
+  void build_blocks(Node *node);
+  void walk(Node *n);
+  void start_element(const char *name, Properties *properties = NULL, bool print_indent = false, bool print_return = true);
+  void simple_element(const char *name, Properties *properties = NULL, bool print_indent = false);
+  void end_element(const char *name, bool print_indent = false, bool print_return = true);
+  void print_edge(int from, int to, int index);
+  void print_indent();
+  void print_method(ciMethod *method, int bci, InlineTree *tree);
+  void print_inline_tree(InlineTree *tree);
+  void clear_nodes();
+
+  IdealGraphPrinter();
+  ~IdealGraphPrinter();
+
+public:
+
+  static void clean_up();
+  static IdealGraphPrinter *printer();
+
+  bool traverse_outs();
+  void set_traverse_outs(bool b);
+  void print_ifg(PhaseIFG* ifg);
+  outputStream *output();
+  void print_inlining(Compile* compile);
+  void begin_method(Compile* compile);
+  void end_method();
+  void print_method(Compile* compile, const char *name, int level=1, bool clear_nodes = false);
+  void print(Compile* compile, const char *name, Node *root, int level=1, bool clear_nodes = false);
+  void print_xml(const char *name);
+
+
+};
+
+#endif
diff --git a/src/share/vm/opto/idealKit.cpp b/src/share/vm/opto/idealKit.cpp
new file mode 100644
index 000000000..ae65319f0
--- /dev/null
+++ b/src/share/vm/opto/idealKit.cpp
@@ -0,0 +1,503 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_idealKit.cpp.incl"
+
+// Static initialization
+
+// This declares the position where vars are kept in the cvstate
+// For some degree of consistency we use the TypeFunc enum to
+// soak up spots in the inputs even though we only use early Control
+// and Memory slots. (So far.)
+const uint IdealKit::first_var = TypeFunc::Parms + 1;
+
+//----------------------------IdealKit-----------------------------------------
+IdealKit::IdealKit(PhaseGVN &gvn, Node* control, Node* mem, bool delay_all_transforms) :
+  _gvn(gvn), C(gvn.C) {
+  _initial_ctrl = control;
+  _initial_memory = mem;
+  _delay_all_transforms = delay_all_transforms;
+  _var_ct = 0;
+  _cvstate = NULL;
+  // We can go memory state free or else we need the entire memory state
+  assert(mem == NULL || mem->Opcode() == Op_MergeMem, "memory must be pre-split");
+  int init_size = 5;
+  _pending_cvstates = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
+  _delay_transform  = new (C->node_arena()) GrowableArray<Node*>(C->node_arena(), init_size, 0, 0);
+  DEBUG_ONLY(_state = new (C->node_arena()) GrowableArray<int>(C->node_arena(), init_size, 0, 0));
+}
+
+//-------------------------------if_then-------------------------------------
+// Create:  if(left relop right)
+//          /  \
+//   iffalse    iftrue
+// Push the iffalse cvstate onto the stack. The iftrue becomes the current cvstate.
+void IdealKit::if_then(Node* left, BoolTest::mask relop,
+                       Node* right, float prob, float cnt, bool push_new_state) {
+  assert((state() & (BlockS|LoopS|IfThenS|ElseS)), "bad state for new If");
+  Node* bol;
+  if (left->bottom_type()->isa_ptr() == NULL) {
+    if (left->bottom_type()->isa_int() != NULL) {
+      bol = Bool(CmpI(left, right), relop);
+    } else {
+      assert(left->bottom_type()->isa_long() != NULL, "what else?");
+      bol = Bool(CmpL(left, right), relop);
+    }
+
+  } else {
+    bol = Bool(CmpP(left, right), relop);
+  }
+  // Delay gvn.tranform on if-nodes until construction is finished
+  // to prevent a constant bool input from discarding a control output.
+  IfNode* iff = delay_transform(new (C, 2) IfNode(ctrl(), bol, prob, cnt))->as_If();
+  Node* then  = IfTrue(iff);
+  Node* elsen = IfFalse(iff);
+  Node* else_cvstate = copy_cvstate();
+  else_cvstate->set_req(TypeFunc::Control, elsen);
+  _pending_cvstates->push(else_cvstate);
+  DEBUG_ONLY(if (push_new_state) _state->push(IfThenS));
+  set_ctrl(then);
+}
+
+//-------------------------------else_-------------------------------------
+// Pop the else cvstate off the stack, and push the (current) then cvstate.
+// The else cvstate becomes the current cvstate.
+void IdealKit::else_() {
+  assert(state() == IfThenS, "bad state for new Else");
+  Node* else_cvstate = _pending_cvstates->pop();
+  DEBUG_ONLY(_state->pop());
+  // save current (then) cvstate for later use at endif
+  _pending_cvstates->push(_cvstate);
+  DEBUG_ONLY(_state->push(ElseS));
+  _cvstate = else_cvstate;
+}
+
+//-------------------------------end_if-------------------------------------
+// Merge the "then" and "else" cvstates.
+//
+// The if_then() pushed the current state for later use
+// as the initial state for a future "else" clause.  The
+// current state then became the initial state for the
+// then clause.  If an "else" clause was encountered, it will
+// pop the top state and use it for it's initial state.
+// It will also push the current state (the state at the end of
+// the "then" clause) for latter use at the end_if.
+//
+// At the endif, the states are:
+// 1) else exists a) current state is end of "else" clause
+//                b) top stack state is end of "then" clause
+//
+// 2) no else:    a) current state is end of "then" clause
+//                b) top stack state is from the "if_then" which
+//                   would have been the initial state of the else.
+//
+// Merging the states is accomplished by:
+//   1) make a label for the merge
+//   2) terminate the current state with a goto to the label
+//   3) pop the top state from the stack and make it the
+//        current state
+//   4) bind the label at the current state.  Binding a label
+//        terminates the current state with a goto to the
+//        label and makes the label's state the current state.
+//
+void IdealKit::end_if() {
+  assert(state() & (IfThenS|ElseS), "bad state for new Endif");
+  Node* lab = make_label(1);
+
+  // Node* join_state = _pending_cvstates->pop();
+                  /* merging, join */
+  goto_(lab);
+  _cvstate = _pending_cvstates->pop();
+
+  bind(lab);
+  DEBUG_ONLY(_state->pop());
+}
+
+//-------------------------------loop-------------------------------------
+// Create the loop head portion (*) of:
+//  *     iv = init
+//  *  top: (region node)
+//  *     if (iv relop limit) {
+//           loop body
+//           i = i + 1
+//           goto top
+//  *     } else // exits loop
+//
+// Pushes the loop top cvstate first, then the else (loop exit) cvstate
+// onto the stack.
+void IdealKit::loop(IdealVariable& iv, Node* init, BoolTest::mask relop, Node* limit, float prob, float cnt) {
+  assert((state() & (BlockS|LoopS|IfThenS|ElseS)), "bad state for new loop");
+  set(iv, init);
+  Node* head = make_label(1);
+  bind(head);
+  _pending_cvstates->push(head); // push for use at end_loop
+  _cvstate = copy_cvstate();
+  if_then(value(iv), relop, limit, prob, cnt, false /* no new state */);
+  DEBUG_ONLY(_state->push(LoopS));
+  assert(ctrl()->is_IfTrue(), "true branch stays in loop");
+  assert(_pending_cvstates->top()->in(TypeFunc::Control)->is_IfFalse(), "false branch exits loop");
+}
+
+//-------------------------------end_loop-------------------------------------
+// Creates the goto top label.
+// Expects the else (loop exit) cvstate to be on top of the
+// stack, and the loop top cvstate to be 2nd.
+void IdealKit::end_loop() {
+  assert((state() == LoopS), "bad state for new end_loop");
+  Node* exit = _pending_cvstates->pop();
+  Node* head = _pending_cvstates->pop();
+  goto_(head);
+  clear(head);
+  DEBUG_ONLY(_state->pop());
+  _cvstate = exit;
+}
+
+//-------------------------------make_label-------------------------------------
+// Creates a label.  The number of goto's
+// must be specified (which should be 1 less than
+// the number of precedessors.)
+Node* IdealKit::make_label(int goto_ct) {
+  assert(_cvstate != NULL, "must declare variables before labels");
+  Node* lab = new_cvstate();
+  int sz = 1 + goto_ct + 1 /* fall thru */;
+  Node* reg = delay_transform(new (C, sz) RegionNode(sz));
+  lab->init_req(TypeFunc::Control, reg);
+  return lab;
+}
+
+//-------------------------------bind-------------------------------------
+// Bind a label at the current cvstate by simulating
+// a goto to the label.
+void IdealKit::bind(Node* lab) {
+  goto_(lab, true /* bind */);
+  _cvstate = lab;
+}
+
+//-------------------------------goto_-------------------------------------
+// Make the current cvstate a predecessor of the label,
+// creating phi's to merge values.  If bind is true and
+// this is not the last control edge, then ensure that
+// all live values have phis created. Used to create phis
+// at loop-top regions.
+void IdealKit::goto_(Node* lab, bool bind) {
+  Node* reg = lab->in(TypeFunc::Control);
+  // find next empty slot in region
+  uint slot = 1;
+  while (slot < reg->req() && reg->in(slot) != NULL) slot++;
+  assert(slot < reg->req(), "too many gotos");
+  // If this is last predecessor, then don't force phi creation
+  if (slot == reg->req() - 1) bind = false;
+  reg->init_req(slot, ctrl());
+  assert(first_var + _var_ct == _cvstate->req(), "bad _cvstate size");
+  for (uint i = first_var; i < _cvstate->req(); i++) {
+
+    // l is the value of var reaching the label. Could be a single value
+    // reaching the label, or a phi that merges multiples values reaching
+    // the label.  The latter is true if the label's input: in(..) is
+    // a phi whose control input is the region node for the label.
+
+    Node* l = lab->in(i);
+    // Get the current value of the var
+    Node* m = _cvstate->in(i);
+    // If the var went unused no need for a phi
+    if (m == NULL) {
+      continue;
+    } else if (l == NULL || m == l) {
+      // Only one unique value "m" is known to reach this label so a phi
+      // is not yet necessary unless:
+      //    the label is being bound and all predecessors have not been seen,
+      //    in which case "bind" will be true.
+      if (bind) {
+        m = promote_to_phi(m, reg);
+      }
+      // Record the phi/value used for this var in the label's cvstate
+      lab->set_req(i, m);
+    } else {
+      // More than one value for the variable reaches this label so
+      // a create a phi if one does not already exist.
+      if (!was_promoted_to_phi(l, reg)) {
+        l = promote_to_phi(l, reg);
+        lab->set_req(i, l);
+      }
+      // Record in the phi, the var's value from the current state
+      l->set_req(slot, m);
+    }
+  }
+  do_memory_merge(_cvstate, lab);
+  stop();
+}
+
+//-----------------------------promote_to_phi-----------------------------------
+Node* IdealKit::promote_to_phi(Node* n, Node* reg) {
+  assert(!was_promoted_to_phi(n, reg), "n already promoted to phi on this region");
+  // Get a conservative type for the phi
+  const BasicType bt = n->bottom_type()->basic_type();
+  const Type* ct = Type::get_const_basic_type(bt);
+  return delay_transform(PhiNode::make(reg, n, ct));
+}
+
+//-----------------------------declares_done-----------------------------------
+void IdealKit::declares_done() {
+  _cvstate = new_cvstate();   // initialize current cvstate
+  set_ctrl(_initial_ctrl);    // initialize control in current cvstate
+  set_all_memory(_initial_memory);// initialize memory in current cvstate
+  DEBUG_ONLY(_state->push(BlockS));
+}
+
+//-----------------------------transform-----------------------------------
+Node* IdealKit::transform(Node* n) {
+  if (_delay_all_transforms) {
+    return delay_transform(n);
+  } else {
+    return gvn().transform(n);
+  }
+}
+
+//-----------------------------delay_transform-----------------------------------
+Node* IdealKit::delay_transform(Node* n) {
+  gvn().set_type(n, n->bottom_type());
+  _delay_transform->push(n);
+  return n;
+}
+
+//-----------------------------new_cvstate-----------------------------------
+Node* IdealKit::new_cvstate() {
+  uint sz = _var_ct + first_var;
+  return new (C, sz) Node(sz);
+}
+
+//-----------------------------copy_cvstate-----------------------------------
+Node* IdealKit::copy_cvstate() {
+  Node* ns = new_cvstate();
+  for (uint i = 0; i < ns->req(); i++) ns->init_req(i, _cvstate->in(i));
+  // We must clone memory since it will be updated as we do stores.
+  ns->set_req(TypeFunc::Memory, MergeMemNode::make(C, ns->in(TypeFunc::Memory)));
+  return ns;
+}
+
+//-----------------------------clear-----------------------------------
+void IdealKit::clear(Node* m) {
+  for (uint i = 0; i < m->req(); i++) m->set_req(i, NULL);
+}
+
+//-----------------------------drain_delay_transform----------------------------
+void IdealKit::drain_delay_transform() {
+  while (_delay_transform->length() > 0) {
+    Node* n = _delay_transform->pop();
+    gvn().transform(n);
+    if (!gvn().is_IterGVN()) {
+      C->record_for_igvn(n);
+    }
+  }
+}
+
+//-----------------------------IdealVariable----------------------------
+IdealVariable::IdealVariable(IdealKit &k) {
+  k.declare(this);
+}
+
+Node* IdealKit::memory(uint alias_idx) {
+  MergeMemNode* mem = merged_memory();
+  Node* p = mem->memory_at(alias_idx);
+  _gvn.set_type(p, Type::MEMORY);  // must be mapped
+  return p;
+}
+
+void IdealKit::set_memory(Node* mem, uint alias_idx) {
+  merged_memory()->set_memory_at(alias_idx, mem);
+}
+
+//----------------------------- make_load ----------------------------
+Node* IdealKit::load(Node* ctl,
+                     Node* adr,
+                     const Type* t,
+                     BasicType bt,
+                     int adr_idx,
+                     bool require_atomic_access) {
+
+  assert(adr_idx != Compile::AliasIdxTop, "use other make_load factory" );
+  const TypePtr* adr_type = NULL; // debug-mode-only argument
+  debug_only(adr_type = C->get_adr_type(adr_idx));
+  Node* mem = memory(adr_idx);
+  Node* ld;
+  if (require_atomic_access && bt == T_LONG) {
+    ld = LoadLNode::make_atomic(C, ctl, mem, adr, adr_type, t);
+  } else {
+    ld = LoadNode::make(C, ctl, mem, adr, adr_type, t, bt);
+  }
+  return transform(ld);
+}
+
+Node* IdealKit::store(Node* ctl, Node* adr, Node *val, BasicType bt,
+                                int adr_idx,
+                                bool require_atomic_access) {
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+  const TypePtr* adr_type = NULL;
+  debug_only(adr_type = C->get_adr_type(adr_idx));
+  Node *mem = memory(adr_idx);
+  Node* st;
+  if (require_atomic_access && bt == T_LONG) {
+    st = StoreLNode::make_atomic(C, ctl, mem, adr, adr_type, val);
+  } else {
+    st = StoreNode::make(C, ctl, mem, adr, adr_type, val, bt);
+  }
+  st = transform(st);
+  set_memory(st, adr_idx);
+
+  return st;
+}
+
+// Card mark store. Must be ordered so that it will come after the store of
+// the oop.
+Node* IdealKit::storeCM(Node* ctl, Node* adr, Node *val, Node* oop_store,
+                        BasicType bt,
+                        int adr_idx) {
+  assert(adr_idx != Compile::AliasIdxTop, "use other store_to_memory factory" );
+  const TypePtr* adr_type = NULL;
+  debug_only(adr_type = C->get_adr_type(adr_idx));
+  Node *mem = memory(adr_idx);
+
+  // Add required edge to oop_store, optimizer does not support precedence edges.
+  // Convert required edge to precedence edge before allocation.
+  Node* st = new (C, 5) StoreCMNode(ctl, mem, adr, adr_type, val, oop_store);
+
+  st = transform(st);
+  set_memory(st, adr_idx);
+
+  return st;
+}
+
+//---------------------------- do_memory_merge --------------------------------
+// The memory from one merging cvstate needs to be merged with the memory for another
+// join cvstate. If the join cvstate doesn't have a merged memory yet then we
+// can just copy the state from the merging cvstate
+
+// Merge one slow path into the rest of memory.
+void IdealKit::do_memory_merge(Node* merging, Node* join) {
+
+  // Get the region for the join state
+  Node* join_region = join->in(TypeFunc::Control);
+  assert(join_region != NULL, "join region must exist");
+  if (join->in(TypeFunc::Memory) == NULL ) {
+    join->set_req(TypeFunc::Memory,  merging->in(TypeFunc::Memory));
+    return;
+  }
+
+  // The control flow for merging must have already been attached to the join region
+  // we need its index for the phis.
+  uint slot;
+  for (slot = 1; slot < join_region->req() ; slot ++ ) {
+    if (join_region->in(slot) == merging->in(TypeFunc::Control)) break;
+  }
+  assert(slot !=  join_region->req(), "edge must already exist");
+
+  MergeMemNode* join_m    = join->in(TypeFunc::Memory)->as_MergeMem();
+  MergeMemNode* merging_m = merging->in(TypeFunc::Memory)->as_MergeMem();
+
+  // join_m should be an ancestor mergemem of merging
+  // Slow path memory comes from the current map (which is from a slow call)
+  // Fast path/null path memory comes from the call's input
+
+  // Merge the other fast-memory inputs with the new slow-default memory.
+  // for (MergeMemStream mms(merged_memory(), fast_mem->as_MergeMem()); mms.next_non_empty2(); ) {
+  for (MergeMemStream mms(join_m, merging_m); mms.next_non_empty2(); ) {
+    Node* join_slice = mms.force_memory();
+    Node* merging_slice = mms.memory2();
+    if (join_slice != merging_slice) {
+      PhiNode* phi;
+      // bool new_phi = false;
+      // Is the phi for this slice one that we created for this join region or simply
+      // one we copied? If it is ours then add
+      if (join_slice->is_Phi() && join_slice->as_Phi()->region() == join_region) {
+        phi = join_slice->as_Phi();
+      } else {
+        // create the phi with join_slice filling supplying memory for all of the
+        // control edges to the join region
+        phi = PhiNode::make(join_region, join_slice, Type::MEMORY, mms.adr_type(C));
+        phi = (PhiNode*) delay_transform(phi);
+        // gvn().set_type(phi, Type::MEMORY);
+        // new_phi = true;
+      }
+      // Now update the phi with the slice for the merging slice
+      phi->set_req(slot, merging_slice/* slow_path, slow_slice */);
+      // this updates join_m with the phi
+      mms.set_memory(phi);
+    }
+  }
+}
+
+
+//----------------------------- make_call  ----------------------------
+// Trivial runtime call
+void IdealKit::make_leaf_call(const TypeFunc *slow_call_type,
+                              address slow_call,
+                              const char *leaf_name,
+                              Node* parm0,
+                              Node* parm1,
+                              Node* parm2) {
+
+  // We only handle taking in RawMem and modifying RawMem
+  const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+  uint adr_idx = C->get_alias_index(adr_type);
+
+  // Clone initial memory
+  MergeMemNode* cloned_mem =  MergeMemNode::make(C, merged_memory());
+
+  // Slow-path leaf call
+  int size = slow_call_type->domain()->cnt();
+  CallNode *call =  (CallNode*)new (C, size) CallLeafNode( slow_call_type, slow_call, leaf_name, adr_type);
+
+  // Set fixed predefined input arguments
+  call->init_req( TypeFunc::Control, ctrl() );
+  call->init_req( TypeFunc::I_O    , top() )     ;   // does no i/o
+  // Narrow memory as only memory input
+  call->init_req( TypeFunc::Memory , memory(adr_idx));
+  call->init_req( TypeFunc::FramePtr, top() /* frameptr() */ );
+  call->init_req( TypeFunc::ReturnAdr, top() );
+
+  if (parm0 != NULL)  call->init_req(TypeFunc::Parms+0, parm0);
+  if (parm1 != NULL)  call->init_req(TypeFunc::Parms+1, parm1);
+  if (parm2 != NULL)  call->init_req(TypeFunc::Parms+2, parm2);
+
+  // Node *c = _gvn.transform(call);
+  call = (CallNode *) _gvn.transform(call);
+  Node *c = call; // dbx gets confused with call call->dump()
+
+  // Slow leaf call has no side-effects, sets few values
+
+  set_ctrl(transform( new (C, 1) ProjNode(call,TypeFunc::Control) ));
+
+  // Set the incoming clone of memory as current memory
+  set_all_memory(cloned_mem);
+
+  // Make memory for the call
+  Node* mem = _gvn.transform( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+
+  // Set the RawPtr memory state only.
+  set_memory(mem, adr_idx);
+
+  assert(C->alias_type(call->adr_type()) == C->alias_type(adr_type),
+         "call node must be constructed correctly");
+}
diff --git a/src/share/vm/opto/idealKit.hpp b/src/share/vm/opto/idealKit.hpp
new file mode 100644
index 000000000..5ccdb77b3
--- /dev/null
+++ b/src/share/vm/opto/idealKit.hpp
@@ -0,0 +1,230 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//-----------------------------------------------------------------------------
+//----------------------------IdealKit-----------------------------------------
+// Set of utilities for creating control flow and scalar SSA data flow.
+// Control:
+//    if_then(left, relop, right)
+//    else_ (optional)
+//    end_if
+//    loop(iv variable, initial, relop, limit)
+//       - sets iv to initial for first trip
+//       - exits when relation on limit is true
+//       - the values of initial and limit should be loop invariant
+//       - no increment, must be explicitly coded
+//       - final value of iv is available after end_loop (until dead())
+//    end_loop
+//    make_label(number of gotos)
+//    goto_(label)
+//    bind(label)
+// Data:
+//    ConI(integer constant)     - create an integer constant
+//    set(variable, value)       - assignment
+//    value(variable)            - reference value
+//    dead(variable)             - variable's value is no longer live
+//    increment(variable, value) - increment variable by value
+//    simple operations: AddI, SubI, AndI, LShiftI, etc.
+// Example:
+//    Node* limit = ??
+//    IdealVariable i(kit), j(kit);
+//    declares_done();
+//    Node* exit = make_label(1); // 1 goto
+//    set(j, ConI(0));
+//    loop(i, ConI(0), BoolTest::lt, limit); {
+//       if_then(value(i), BoolTest::gt, ConI(5)) {
+//         set(j, ConI(1));
+//         goto_(exit); dead(i);
+//       } end_if();
+//       increment(i, ConI(1));
+//    } end_loop(); dead(i);
+//    bind(exit);
+//
+// See string_indexOf for a more complete example.
+
+class IdealKit;
+
+// Variable definition for IdealKit
+class IdealVariable: public StackObj {
+ friend class IdealKit;
+ private:
+  int _id;
+  void set_id(int id) { _id = id; }
+ public:
+  IdealVariable(IdealKit &k);
+  int id() { assert(has_id(),"uninitialized id"); return _id; }
+  bool has_id() { return _id >= 0; }
+};
+
+class IdealKit: public StackObj {
+ friend class IdealVariable;
+  // The main state (called a cvstate for Control and Variables)
+  // contains both the current values of the variables and the
+  // current set of predecessor control edges.  The variable values
+  // are managed via a Node [in(1)..in(_var_ct)], and the predecessor
+  // control edges managed via a RegionNode. The in(0) of the Node
+  // for variables points to the RegionNode for the control edges.
+ protected:
+  Compile * const C;
+  PhaseGVN &_gvn;
+  GrowableArray<Node*>* _pending_cvstates; // stack of cvstates
+  GrowableArray<Node*>* _delay_transform;  // delay invoking gvn.transform until drain
+  Node* _cvstate;                          // current cvstate (control, memory and variables)
+  uint _var_ct;                            // number of variables
+  bool _delay_all_transforms;              // flag forcing all transforms to be delayed
+  Node* _initial_ctrl;                     // saves initial control until variables declared
+  Node* _initial_memory;                   // saves initial memory  until variables declared
+
+  PhaseGVN& gvn() const { return _gvn; }
+  // Create a new cvstate filled with nulls
+  Node* new_cvstate();                     // Create a new cvstate
+  Node* cvstate() { return _cvstate; }     // current cvstate
+  Node* copy_cvstate();                    // copy current cvstate
+  void set_ctrl(Node* ctrl) { _cvstate->set_req(TypeFunc::Control, ctrl); }
+
+  // Should this assert this is a MergeMem???
+  void set_all_memory(Node* mem){ _cvstate->set_req(TypeFunc::Memory, mem); }
+  void set_memory(Node* mem, uint alias_idx );
+  void do_memory_merge(Node* merging, Node* join);
+  void clear(Node* m);                     // clear a cvstate
+  void stop() { clear(_cvstate); }         // clear current cvstate
+  Node* delay_transform(Node* n);
+  Node* transform(Node* n);                // gvn.transform or push node on delay list
+  Node* promote_to_phi(Node* n, Node* reg);// Promote "n" to a phi on region "reg"
+  bool was_promoted_to_phi(Node* n, Node* reg) {
+    return (n->is_Phi() && n->in(0) == reg);
+  }
+  void declare(IdealVariable* v) { v->set_id(_var_ct++); }
+  // This declares the position where vars are kept in the cvstate
+  // For some degree of consistency we use the TypeFunc enum to
+  // soak up spots in the inputs even though we only use early Control
+  // and Memory slots. (So far.)
+  static const uint first_var; // = TypeFunc::Parms + 1;
+
+#ifdef ASSERT
+  enum State { NullS=0, BlockS=1, LoopS=2, IfThenS=4, ElseS=8, EndifS= 16 };
+  GrowableArray<int>* _state;
+  State state() { return (State)(_state->top()); }
+#endif
+
+  // Users should not care about slices only MergedMem so no access for them.
+  Node* memory(uint alias_idx);
+
+ public:
+  IdealKit(PhaseGVN &gvn, Node* control, Node* memory, bool delay_all_transforms = false);
+  ~IdealKit() {
+    stop();
+    drain_delay_transform();
+  }
+  // Control
+  Node* ctrl()                          { return _cvstate->in(TypeFunc::Control); }
+  Node* top()                           { return C->top(); }
+  MergeMemNode* merged_memory()         { return _cvstate->in(TypeFunc::Memory)->as_MergeMem(); }
+  void set(IdealVariable& v, Node* rhs) { _cvstate->set_req(first_var + v.id(), rhs); }
+  Node* value(IdealVariable& v)         { return _cvstate->in(first_var + v.id()); }
+  void dead(IdealVariable& v)           { set(v, (Node*)NULL); }
+  void if_then(Node* left, BoolTest::mask relop, Node* right,
+               float prob = PROB_FAIR, float cnt = COUNT_UNKNOWN,
+               bool push_new_state = true);
+  void else_();
+  void end_if();
+  void loop(IdealVariable& iv, Node* init, BoolTest::mask cmp, Node* limit,
+            float prob = PROB_LIKELY(0.9), float cnt = COUNT_UNKNOWN);
+  void end_loop();
+  Node* make_label(int goto_ct);
+  void bind(Node* lab);
+  void goto_(Node* lab, bool bind = false);
+  void declares_done();
+  void drain_delay_transform();
+
+  Node* IfTrue(IfNode* iff)  { return transform(new (C,1) IfTrueNode(iff)); }
+  Node* IfFalse(IfNode* iff) { return transform(new (C,1) IfFalseNode(iff)); }
+
+  // Data
+  Node* ConI(jint k) { return (Node*)gvn().intcon(k); }
+  Node* makecon(const Type *t)  const { return _gvn.makecon(t); }
+
+  Node* AddI(Node* l, Node* r) { return transform(new (C,3) AddINode(l, r)); }
+  Node* SubI(Node* l, Node* r) { return transform(new (C,3) SubINode(l, r)); }
+  Node* AndI(Node* l, Node* r) { return transform(new (C,3) AndINode(l, r)); }
+  Node* MaxI(Node* l, Node* r) { return transform(new (C,3) MaxINode(l, r)); }
+  Node* LShiftI(Node* l, Node* r) { return transform(new (C,3) LShiftINode(l, r)); }
+  Node* CmpI(Node* l, Node* r) { return transform(new (C,3) CmpINode(l, r)); }
+  Node* Bool(Node* cmp, BoolTest::mask relop) { return transform(new (C,2) BoolNode(cmp, relop)); }
+  void  increment(IdealVariable& v, Node* j)  { set(v, AddI(value(v), j)); }
+  void  decrement(IdealVariable& v, Node* j)  { set(v, SubI(value(v), j)); }
+
+  Node* CmpL(Node* l, Node* r) { return transform(new (C,3) CmpLNode(l, r)); }
+
+  // TLS
+  Node* thread()  {  return gvn().transform(new (C, 1) ThreadLocalNode()); }
+
+  // Pointers
+  Node* AddP(Node *base, Node *ptr, Node *off) { return transform(new (C,4) AddPNode(base, ptr, off)); }
+  Node* CmpP(Node* l, Node* r) { return transform(new (C,3) CmpPNode(l, r)); }
+#ifdef _LP64
+  Node* XorX(Node* l, Node* r) { return transform(new (C,3) XorLNode(l, r)); }
+#else // _LP64
+  Node* XorX(Node* l, Node* r) { return transform(new (C,3) XorINode(l, r)); }
+#endif // _LP64
+  Node* URShiftX(Node* l, Node* r) { return transform(new (C,3) URShiftXNode(l, r)); }
+  Node* ConX(jint k) { return (Node*)gvn().MakeConX(k); }
+  Node* CastPX(Node* ctl, Node* p) { return transform(new (C,2) CastP2XNode(ctl, p)); }
+  // Add a fixed offset to a pointer
+  Node* basic_plus_adr(Node* base, Node* ptr, intptr_t offset);
+
+  // Memory operations
+
+  // This is the base version which is given an alias index.
+  Node* load(Node* ctl,
+             Node* adr,
+             const Type* t,
+             BasicType bt,
+             int adr_idx,
+             bool require_atomic_access = false);
+
+  // Return the new StoreXNode
+  Node* store(Node* ctl,
+              Node* adr,
+              Node* val,
+              BasicType bt,
+              int adr_idx,
+              bool require_atomic_access = false);
+
+  // Store a card mark ordered after store_oop
+  Node* storeCM(Node* ctl,
+                Node* adr,
+                Node* val,
+                Node* oop_store,
+                BasicType bt,
+                int adr_idx);
+
+  // Trivial call
+  void make_leaf_call(const TypeFunc *slow_call_type,
+                      address slow_call,
+                      const char *leaf_name,
+                      Node* parm0,
+                      Node* parm1 = NULL,
+                      Node* parm2 = NULL);
+};
diff --git a/src/share/vm/opto/ifg.cpp b/src/share/vm/opto/ifg.cpp
new file mode 100644
index 000000000..2c6cd665f
--- /dev/null
+++ b/src/share/vm/opto/ifg.cpp
@@ -0,0 +1,813 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_ifg.cpp.incl"
+
+#define EXACT_PRESSURE 1
+
+//=============================================================================
+//------------------------------IFG--------------------------------------------
+PhaseIFG::PhaseIFG( Arena *arena ) : Phase(Interference_Graph), _arena(arena) {
+}
+
+//------------------------------init-------------------------------------------
+void PhaseIFG::init( uint maxlrg ) {
+  _maxlrg = maxlrg;
+  _yanked = new (_arena) VectorSet(_arena);
+  _is_square = false;
+  // Make uninitialized adjacency lists
+  _adjs = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*maxlrg);
+  // Also make empty live range structures
+  _lrgs = (LRG *)_arena->Amalloc( maxlrg * sizeof(LRG) );
+  memset(_lrgs,0,sizeof(LRG)*maxlrg);
+  // Init all to empty
+  for( uint i = 0; i < maxlrg; i++ ) {
+    _adjs[i].initialize(maxlrg);
+    _lrgs[i].Set_All();
+  }
+}
+
+//------------------------------add--------------------------------------------
+// Add edge between vertices a & b.  These are sorted (triangular matrix),
+// then the smaller number is inserted in the larger numbered array.
+int PhaseIFG::add_edge( uint a, uint b ) {
+  lrgs(a).invalid_degree();
+  lrgs(b).invalid_degree();
+  // Sort a and b, so that a is bigger
+  assert( !_is_square, "only on triangular" );
+  if( a < b ) { uint tmp = a; a = b; b = tmp; }
+  return _adjs[a].insert( b );
+}
+
+//------------------------------add_vector-------------------------------------
+// Add an edge between 'a' and everything in the vector.
+void PhaseIFG::add_vector( uint a, IndexSet *vec ) {
+  // IFG is triangular, so do the inserts where 'a' < 'b'.
+  assert( !_is_square, "only on triangular" );
+  IndexSet *adjs_a = &_adjs[a];
+  if( !vec->count() ) return;
+
+  IndexSetIterator elements(vec);
+  uint neighbor;
+  while ((neighbor = elements.next()) != 0) {
+    add_edge( a, neighbor );
+  }
+}
+
+//------------------------------test-------------------------------------------
+// Is there an edge between a and b?
+int PhaseIFG::test_edge( uint a, uint b ) const {
+  // Sort a and b, so that a is larger
+  assert( !_is_square, "only on triangular" );
+  if( a < b ) { uint tmp = a; a = b; b = tmp; }
+  return _adjs[a].member(b);
+}
+
+//------------------------------SquareUp---------------------------------------
+// Convert triangular matrix to square matrix
+void PhaseIFG::SquareUp() {
+  assert( !_is_square, "only on triangular" );
+
+  // Simple transpose
+  for( uint i = 0; i < _maxlrg; i++ ) {
+    IndexSetIterator elements(&_adjs[i]);
+    uint datum;
+    while ((datum = elements.next()) != 0) {
+      _adjs[datum].insert( i );
+    }
+  }
+  _is_square = true;
+}
+
+//------------------------------Compute_Effective_Degree-----------------------
+// Compute effective degree in bulk
+void PhaseIFG::Compute_Effective_Degree() {
+  assert( _is_square, "only on square" );
+
+  for( uint i = 0; i < _maxlrg; i++ )
+    lrgs(i).set_degree(effective_degree(i));
+}
+
+//------------------------------test_edge_sq-----------------------------------
+int PhaseIFG::test_edge_sq( uint a, uint b ) const {
+  assert( _is_square, "only on square" );
+  // Swap, so that 'a' has the lesser count.  Then binary search is on
+  // the smaller of a's list and b's list.
+  if( neighbor_cnt(a) > neighbor_cnt(b) ) { uint tmp = a; a = b; b = tmp; }
+  //return _adjs[a].unordered_member(b);
+  return _adjs[a].member(b);
+}
+
+//------------------------------Union------------------------------------------
+// Union edges of B into A
+void PhaseIFG::Union( uint a, uint b ) {
+  assert( _is_square, "only on square" );
+  IndexSet *A = &_adjs[a];
+  IndexSetIterator b_elements(&_adjs[b]);
+  uint datum;
+  while ((datum = b_elements.next()) != 0) {
+    if(A->insert(datum)) {
+      _adjs[datum].insert(a);
+      lrgs(a).invalid_degree();
+      lrgs(datum).invalid_degree();
+    }
+  }
+}
+
+//------------------------------remove_node------------------------------------
+// Yank a Node and all connected edges from the IFG.  Return a
+// list of neighbors (edges) yanked.
+IndexSet *PhaseIFG::remove_node( uint a ) {
+  assert( _is_square, "only on square" );
+  assert( !_yanked->test(a), "" );
+  _yanked->set(a);
+
+  // I remove the LRG from all neighbors.
+  IndexSetIterator elements(&_adjs[a]);
+  LRG &lrg_a = lrgs(a);
+  uint datum;
+  while ((datum = elements.next()) != 0) {
+    _adjs[datum].remove(a);
+    lrgs(datum).inc_degree( -lrg_a.compute_degree(lrgs(datum)) );
+  }
+  return neighbors(a);
+}
+
+//------------------------------re_insert--------------------------------------
+// Re-insert a yanked Node.
+void PhaseIFG::re_insert( uint a ) {
+  assert( _is_square, "only on square" );
+  assert( _yanked->test(a), "" );
+  (*_yanked) >>= a;
+
+  IndexSetIterator elements(&_adjs[a]);
+  uint datum;
+  while ((datum = elements.next()) != 0) {
+    _adjs[datum].insert(a);
+    lrgs(datum).invalid_degree();
+  }
+}
+
+//------------------------------compute_degree---------------------------------
+// Compute the degree between 2 live ranges.  If both live ranges are
+// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
+// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
+// this is so.
+int LRG::compute_degree( LRG &l ) const {
+  int tmp;
+  int num_regs = _num_regs;
+  int nregs = l.num_regs();
+  tmp =  (_fat_proj || l._fat_proj)     // either is a fat-proj?
+    ? (num_regs * nregs)                // then use product
+    : MAX2(num_regs,nregs);             // else use max
+  return tmp;
+}
+
+//------------------------------effective_degree-------------------------------
+// Compute effective degree for this live range.  If both live ranges are
+// aligned-adjacent powers-of-2 then we use the MAX size.  If either is
+// mis-aligned (or for Fat-Projections, not-adjacent) then we have to
+// MULTIPLY the sizes.  Inspect Brigg's thesis on register pairs to see why
+// this is so.
+int PhaseIFG::effective_degree( uint lidx ) const {
+  int eff = 0;
+  int num_regs = lrgs(lidx).num_regs();
+  int fat_proj = lrgs(lidx)._fat_proj;
+  IndexSet *s = neighbors(lidx);
+  IndexSetIterator elements(s);
+  uint nidx;
+  while((nidx = elements.next()) != 0) {
+    LRG &lrgn = lrgs(nidx);
+    int nregs = lrgn.num_regs();
+    eff += (fat_proj || lrgn._fat_proj) // either is a fat-proj?
+      ? (num_regs * nregs)              // then use product
+      : MAX2(num_regs,nregs);           // else use max
+  }
+  return eff;
+}
+
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+void PhaseIFG::dump() const {
+  tty->print_cr("-- Interference Graph --%s--",
+                _is_square ? "square" : "triangular" );
+  if( _is_square ) {
+    for( uint i = 0; i < _maxlrg; i++ ) {
+      tty->print( (*_yanked)[i] ? "XX " : "  ");
+      tty->print("L%d: { ",i);
+      IndexSetIterator elements(&_adjs[i]);
+      uint datum;
+      while ((datum = elements.next()) != 0) {
+        tty->print("L%d ", datum);
+      }
+      tty->print_cr("}");
+
+    }
+    return;
+  }
+
+  // Triangular
+  for( uint i = 0; i < _maxlrg; i++ ) {
+    uint j;
+    tty->print( (*_yanked)[i] ? "XX " : "  ");
+    tty->print("L%d: { ",i);
+    for( j = _maxlrg; j > i; j-- )
+      if( test_edge(j - 1,i) ) {
+        tty->print("L%d ",j - 1);
+      }
+    tty->print("| ");
+    IndexSetIterator elements(&_adjs[i]);
+    uint datum;
+    while ((datum = elements.next()) != 0) {
+      tty->print("L%d ", datum);
+    }
+    tty->print("}\n");
+  }
+  tty->print("\n");
+}
+
+//------------------------------stats------------------------------------------
+void PhaseIFG::stats() const {
+  ResourceMark rm;
+  int *h_cnt = NEW_RESOURCE_ARRAY(int,_maxlrg*2);
+  memset( h_cnt, 0, sizeof(int)*_maxlrg*2 );
+  uint i;
+  for( i = 0; i < _maxlrg; i++ ) {
+    h_cnt[neighbor_cnt(i)]++;
+  }
+  tty->print_cr("--Histogram of counts--");
+  for( i = 0; i < _maxlrg*2; i++ )
+    if( h_cnt[i] )
+      tty->print("%d/%d ",i,h_cnt[i]);
+  tty->print_cr("");
+}
+
+//------------------------------verify-----------------------------------------
+void PhaseIFG::verify( const PhaseChaitin *pc ) const {
+  // IFG is square, sorted and no need for Find
+  for( uint i = 0; i < _maxlrg; i++ ) {
+    assert(!((*_yanked)[i]) || !neighbor_cnt(i), "Is removed completely" );
+    IndexSet *set = &_adjs[i];
+    IndexSetIterator elements(set);
+    uint idx;
+    uint last = 0;
+    while ((idx = elements.next()) != 0) {
+      assert( idx != i, "Must have empty diagonal");
+      assert( pc->Find_const(idx) == idx, "Must not need Find" );
+      assert( _adjs[idx].member(i), "IFG not square" );
+      assert( !(*_yanked)[idx], "No yanked neighbors" );
+      assert( last < idx, "not sorted increasing");
+      last = idx;
+    }
+    assert( !lrgs(i)._degree_valid ||
+            effective_degree(i) == lrgs(i).degree(), "degree is valid but wrong" );
+  }
+}
+#endif
+
+//------------------------------interfere_with_live----------------------------
+// Interfere this register with everything currently live.  Use the RegMasks
+// to trim the set of possible interferences. Return a count of register-only
+// inteferences as an estimate of register pressure.
+void PhaseChaitin::interfere_with_live( uint r, IndexSet *liveout ) {
+  uint retval = 0;
+  // Interfere with everything live.
+  const RegMask &rm = lrgs(r).mask();
+  // Check for interference by checking overlap of regmasks.
+  // Only interfere if acceptable register masks overlap.
+  IndexSetIterator elements(liveout);
+  uint l;
+  while( (l = elements.next()) != 0 )
+    if( rm.overlap( lrgs(l).mask() ) )
+      _ifg->add_edge( r, l );
+}
+
+//------------------------------build_ifg_virtual------------------------------
+// Actually build the interference graph.  Uses virtual registers only, no
+// physical register masks.  This allows me to be very aggressive when
+// coalescing copies.  Some of this aggressiveness will have to be undone
+// later, but I'd rather get all the copies I can now (since unremoved copies
+// at this point can end up in bad places).  Copies I re-insert later I have
+// more opportunity to insert them in low-frequency locations.
+void PhaseChaitin::build_ifg_virtual( ) {
+
+  // For all blocks (in any order) do...
+  for( uint i=0; i<_cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    IndexSet *liveout = _live->live(b);
+
+    // The IFG is built by a single reverse pass over each basic block.
+    // Starting with the known live-out set, we remove things that get
+    // defined and add things that become live (essentially executing one
+    // pass of a standard LIVE analysis). Just before a Node defines a value
+    // (and removes it from the live-ness set) that value is certainly live.
+    // The defined value interferes with everything currently live.  The
+    // value is then removed from the live-ness set and it's inputs are
+    // added to the live-ness set.
+    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+      Node *n = b->_nodes[j-1];
+
+      // Get value being defined
+      uint r = n2lidx(n);
+
+      // Some special values do not allocate
+      if( r ) {
+
+        // Remove from live-out set
+        liveout->remove(r);
+
+        // Copies do not define a new value and so do not interfere.
+        // Remove the copies source from the liveout set before interfering.
+        uint idx = n->is_Copy();
+        if( idx ) liveout->remove( n2lidx(n->in(idx)) );
+
+        // Interfere with everything live
+        interfere_with_live( r, liveout );
+      }
+
+      // Make all inputs live
+      if( !n->is_Phi() ) {      // Phi function uses come from prior block
+        for( uint k = 1; k < n->req(); k++ )
+          liveout->insert( n2lidx(n->in(k)) );
+      }
+
+      // 2-address instructions always have the defined value live
+      // on entry to the instruction, even though it is being defined
+      // by the instruction.  We pretend a virtual copy sits just prior
+      // to the instruction and kills the src-def'd register.
+      // In other words, for 2-address instructions the defined value
+      // interferes with all inputs.
+      uint idx;
+      if( n->is_Mach() && (idx = n->as_Mach()->two_adr()) ) {
+        const MachNode *mach = n->as_Mach();
+        // Sometimes my 2-address ADDs are commuted in a bad way.
+        // We generally want the USE-DEF register to refer to the
+        // loop-varying quantity, to avoid a copy.
+        uint op = mach->ideal_Opcode();
+        // Check that mach->num_opnds() == 3 to ensure instruction is
+        // not subsuming constants, effectively excludes addI_cin_imm
+        // Can NOT swap for instructions like addI_cin_imm since it
+        // is adding zero to yhi + carry and the second ideal-input
+        // points to the result of adding low-halves.
+        // Checking req() and num_opnds() does NOT distinguish addI_cout from addI_cout_imm
+        if( (op == Op_AddI && mach->req() == 3 && mach->num_opnds() == 3) &&
+            n->in(1)->bottom_type()->base() == Type::Int &&
+            // See if the ADD is involved in a tight data loop the wrong way
+            n->in(2)->is_Phi() &&
+            n->in(2)->in(2) == n ) {
+          Node *tmp = n->in(1);
+          n->set_req( 1, n->in(2) );
+          n->set_req( 2, tmp );
+        }
+        // Defined value interferes with all inputs
+        uint lidx = n2lidx(n->in(idx));
+        for( uint k = 1; k < n->req(); k++ ) {
+          uint kidx = n2lidx(n->in(k));
+          if( kidx != lidx )
+            _ifg->add_edge( r, kidx );
+        }
+      }
+    } // End of forall instructions in block
+  } // End of forall blocks
+}
+
+//------------------------------count_int_pressure-----------------------------
+uint PhaseChaitin::count_int_pressure( IndexSet *liveout ) {
+  IndexSetIterator elements(liveout);
+  uint lidx;
+  uint cnt = 0;
+  while ((lidx = elements.next()) != 0) {
+    if( lrgs(lidx).mask().is_UP() &&
+        lrgs(lidx).mask_size() &&
+        !lrgs(lidx)._is_float &&
+        lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) )
+      cnt += lrgs(lidx).reg_pressure();
+  }
+  return cnt;
+}
+
+//------------------------------count_float_pressure---------------------------
+uint PhaseChaitin::count_float_pressure( IndexSet *liveout ) {
+  IndexSetIterator elements(liveout);
+  uint lidx;
+  uint cnt = 0;
+  while ((lidx = elements.next()) != 0) {
+    if( lrgs(lidx).mask().is_UP() &&
+        lrgs(lidx).mask_size() &&
+        lrgs(lidx)._is_float )
+      cnt += lrgs(lidx).reg_pressure();
+  }
+  return cnt;
+}
+
+//------------------------------lower_pressure---------------------------------
+// Adjust register pressure down by 1.  Capture last hi-to-low transition,
+static void lower_pressure( LRG *lrg, uint where, Block *b, uint *pressure, uint *hrp_index ) {
+  if( lrg->mask().is_UP() && lrg->mask_size() ) {
+    if( lrg->_is_float ) {
+      pressure[1] -= lrg->reg_pressure();
+      if( pressure[1] == (uint)FLOATPRESSURE ) {
+        hrp_index[1] = where;
+#ifdef EXACT_PRESSURE
+      if( pressure[1] > b->_freg_pressure )
+        b->_freg_pressure = pressure[1]+1;
+#else
+        b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+      }
+    } else if( lrg->mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+      pressure[0] -= lrg->reg_pressure();
+      if( pressure[0] == (uint)INTPRESSURE   ) {
+        hrp_index[0] = where;
+#ifdef EXACT_PRESSURE
+      if( pressure[0] > b->_reg_pressure )
+        b->_reg_pressure = pressure[0]+1;
+#else
+        b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+      }
+    }
+  }
+}
+
+//------------------------------build_ifg_physical-----------------------------
+// Build the interference graph using physical registers when available.
+// That is, if 2 live ranges are simultaneously alive but in their acceptable
+// register sets do not overlap, then they do not interfere.
+uint PhaseChaitin::build_ifg_physical( ResourceArea *a ) {
+  NOT_PRODUCT( Compile::TracePhase t3("buildIFG", &_t_buildIFGphysical, TimeCompiler); )
+
+  uint spill_reg = LRG::SPILL_REG;
+  uint must_spill = 0;
+
+  // For all blocks (in any order) do...
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    // Clone (rather than smash in place) the liveout info, so it is alive
+    // for the "collect_gc_info" phase later.
+    IndexSet liveout(_live->live(b));
+    uint last_inst = b->end_idx();
+    // Compute last phi index
+    uint last_phi;
+    for( last_phi = 1; last_phi < last_inst; last_phi++ )
+      if( !b->_nodes[last_phi]->is_Phi() )
+        break;
+
+    // Reset block's register pressure values for each ifg construction
+    uint pressure[2], hrp_index[2];
+    pressure[0] = pressure[1] = 0;
+    hrp_index[0] = hrp_index[1] = last_inst+1;
+    b->_reg_pressure = b->_freg_pressure = 0;
+    // Liveout things are presumed live for the whole block.  We accumulate
+    // 'area' accordingly.  If they get killed in the block, we'll subtract
+    // the unused part of the block from the area.
+    double cost = b->_freq * double(last_inst-last_phi);
+    assert( cost >= 0, "negative spill cost" );
+    IndexSetIterator elements(&liveout);
+    uint lidx;
+    while ((lidx = elements.next()) != 0) {
+      LRG &lrg = lrgs(lidx);
+      lrg._area += cost;
+      // Compute initial register pressure
+      if( lrg.mask().is_UP() && lrg.mask_size() ) {
+        if( lrg._is_float ) {   // Count float pressure
+          pressure[1] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+          if( pressure[1] > b->_freg_pressure )
+            b->_freg_pressure = pressure[1];
+#endif
+          // Count int pressure, but do not count the SP, flags
+        } else if( lrgs(lidx).mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+          pressure[0] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+          if( pressure[0] > b->_reg_pressure )
+            b->_reg_pressure = pressure[0];
+#endif
+        }
+      }
+    }
+    assert( pressure[0] == count_int_pressure  (&liveout), "" );
+    assert( pressure[1] == count_float_pressure(&liveout), "" );
+
+    // The IFG is built by a single reverse pass over each basic block.
+    // Starting with the known live-out set, we remove things that get
+    // defined and add things that become live (essentially executing one
+    // pass of a standard LIVE analysis).  Just before a Node defines a value
+    // (and removes it from the live-ness set) that value is certainly live.
+    // The defined value interferes with everything currently live.  The
+    // value is then removed from the live-ness set and it's inputs are added
+    // to the live-ness set.
+    uint j;
+    for( j = last_inst + 1; j > 1; j-- ) {
+      Node *n = b->_nodes[j - 1];
+
+      // Get value being defined
+      uint r = n2lidx(n);
+
+      // Some special values do not allocate
+      if( r ) {
+        // A DEF normally costs block frequency; rematerialized values are
+        // removed from the DEF sight, so LOWER costs here.
+        lrgs(r)._cost += n->rematerialize() ? 0 : b->_freq;
+
+        // If it is not live, then this instruction is dead.  Probably caused
+        // by spilling and rematerialization.  Who cares why, yank this baby.
+        if( !liveout.member(r) && n->Opcode() != Op_SafePoint ) {
+          Node *def = n->in(0);
+          if( !n->is_Proj() ||
+              // Could also be a flags-projection of a dead ADD or such.
+              (n2lidx(def) && !liveout.member(n2lidx(def)) ) ) {
+            b->_nodes.remove(j - 1);
+            if( lrgs(r)._def == n ) lrgs(r)._def = 0;
+            n->disconnect_inputs(NULL);
+            _cfg._bbs.map(n->_idx,NULL);
+            n->replace_by(C->top());
+            // Since yanking a Node from block, high pressure moves up one
+            hrp_index[0]--;
+            hrp_index[1]--;
+            continue;
+          }
+
+          // Fat-projections kill many registers which cannot be used to
+          // hold live ranges.
+          if( lrgs(r)._fat_proj ) {
+            // Count the int-only registers
+            RegMask itmp = lrgs(r).mask();
+            itmp.AND(*Matcher::idealreg2regmask[Op_RegI]);
+            int iregs = itmp.Size();
+#ifdef EXACT_PRESSURE
+            if( pressure[0]+iregs > b->_reg_pressure )
+              b->_reg_pressure = pressure[0]+iregs;
+#endif
+            if( pressure[0]       <= (uint)INTPRESSURE &&
+                pressure[0]+iregs >  (uint)INTPRESSURE ) {
+#ifndef EXACT_PRESSURE
+              b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+              hrp_index[0] = j-1;
+            }
+            // Count the float-only registers
+            RegMask ftmp = lrgs(r).mask();
+            ftmp.AND(*Matcher::idealreg2regmask[Op_RegD]);
+            int fregs = ftmp.Size();
+#ifdef EXACT_PRESSURE
+            if( pressure[1]+fregs > b->_freg_pressure )
+              b->_freg_pressure = pressure[1]+fregs;
+#endif
+            if( pressure[1]       <= (uint)FLOATPRESSURE &&
+                pressure[1]+fregs >  (uint)FLOATPRESSURE ) {
+#ifndef EXACT_PRESSURE
+              b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+              hrp_index[1] = j-1;
+            }
+          }
+
+        } else {                // Else it is live
+          // A DEF also ends 'area' partway through the block.
+          lrgs(r)._area -= cost;
+          assert( lrgs(r)._area >= 0, "negative spill area" );
+
+          // Insure high score for immediate-use spill copies so they get a color
+          if( n->is_SpillCopy()
+              && lrgs(r)._def != NodeSentinel     // MultiDef live range can still split
+              && n->outcnt() == 1              // and use must be in this block
+              && _cfg._bbs[n->unique_out()->_idx] == b ) {
+            // All single-use MachSpillCopy(s) that immediately precede their
+            // use must color early.  If a longer live range steals their
+            // color, the spill copy will split and may push another spill copy
+            // further away resulting in an infinite spill-split-retry cycle.
+            // Assigning a zero area results in a high score() and a good
+            // location in the simplify list.
+            //
+
+            Node *single_use = n->unique_out();
+            assert( b->find_node(single_use) >= j, "Use must be later in block");
+            // Use can be earlier in block if it is a Phi, but then I should be a MultiDef
+
+            // Find first non SpillCopy 'm' that follows the current instruction
+            // (j - 1) is index for current instruction 'n'
+            Node *m = n;
+            for( uint i = j; i <= last_inst && m->is_SpillCopy(); ++i ) { m = b->_nodes[i]; }
+            if( m == single_use ) {
+              lrgs(r)._area = 0.0;
+            }
+          }
+
+          // Remove from live-out set
+          if( liveout.remove(r) ) {
+            // Adjust register pressure.
+            // Capture last hi-to-lo pressure transition
+            lower_pressure( &lrgs(r), j-1, b, pressure, hrp_index );
+            assert( pressure[0] == count_int_pressure  (&liveout), "" );
+            assert( pressure[1] == count_float_pressure(&liveout), "" );
+          }
+
+          // Copies do not define a new value and so do not interfere.
+          // Remove the copies source from the liveout set before interfering.
+          uint idx = n->is_Copy();
+          if( idx ) {
+            uint x = n2lidx(n->in(idx));
+            if( liveout.remove( x ) ) {
+              lrgs(x)._area -= cost;
+              // Adjust register pressure.
+              lower_pressure( &lrgs(x), j-1, b, pressure, hrp_index );
+              assert( pressure[0] == count_int_pressure  (&liveout), "" );
+              assert( pressure[1] == count_float_pressure(&liveout), "" );
+            }
+          }
+        } // End of if live or not
+
+        // Interfere with everything live.  If the defined value must
+        // go in a particular register, just remove that register from
+        // all conflicting parties and avoid the interference.
+
+        // Make exclusions for rematerializable defs.  Since rematerializable
+        // DEFs are not bound but the live range is, some uses must be bound.
+        // If we spill live range 'r', it can rematerialize at each use site
+        // according to its bindings.
+        const RegMask &rmask = lrgs(r).mask();
+        if( lrgs(r).is_bound() && !(n->rematerialize()) && rmask.is_NotEmpty() ) {
+          // Smear odd bits; leave only aligned pairs of bits.
+          RegMask r2mask = rmask;
+          r2mask.SmearToPairs();
+          // Check for common case
+          int r_size = lrgs(r).num_regs();
+          OptoReg::Name r_reg = (r_size == 1) ? rmask.find_first_elem() : OptoReg::Physical;
+
+          IndexSetIterator elements(&liveout);
+          uint l;
+          while ((l = elements.next()) != 0) {
+            LRG &lrg = lrgs(l);
+            // If 'l' must spill already, do not further hack his bits.
+            // He'll get some interferences and be forced to spill later.
+            if( lrg._must_spill ) continue;
+            // Remove bound register(s) from 'l's choices
+            RegMask old = lrg.mask();
+            uint old_size = lrg.mask_size();
+            // Remove the bits from LRG 'r' from LRG 'l' so 'l' no
+            // longer interferes with 'r'.  If 'l' requires aligned
+            // adjacent pairs, subtract out bit pairs.
+            if( lrg.num_regs() == 2 && !lrg._fat_proj ) {
+              lrg.SUBTRACT( r2mask );
+              lrg.compute_set_mask_size();
+            } else if( r_size != 1 ) {
+              lrg.SUBTRACT( rmask );
+              lrg.compute_set_mask_size();
+            } else {            // Common case: size 1 bound removal
+              if( lrg.mask().Member(r_reg) ) {
+                lrg.Remove(r_reg);
+                lrg.set_mask_size(lrg.mask().is_AllStack() ? 65535:old_size-1);
+              }
+            }
+            // If 'l' goes completely dry, it must spill.
+            if( lrg.not_free() ) {
+              // Give 'l' some kind of reasonable mask, so he picks up
+              // interferences (and will spill later).
+              lrg.set_mask( old );
+              lrg.set_mask_size(old_size);
+              must_spill++;
+              lrg._must_spill = 1;
+              lrg.set_reg(OptoReg::Name(LRG::SPILL_REG));
+            }
+          }
+        } // End of if bound
+
+        // Now interference with everything that is live and has
+        // compatible register sets.
+        interfere_with_live(r,&liveout);
+
+      } // End of if normal register-allocated value
+
+      cost -= b->_freq;         // Area remaining in the block
+      if( cost < 0.0 ) cost = 0.0;  // Cost goes negative in the Phi area
+
+      // Make all inputs live
+      if( !n->is_Phi() ) {      // Phi function uses come from prior block
+        JVMState* jvms = n->jvms();
+        uint debug_start = jvms ? jvms->debug_start() : 999999;
+        // Start loop at 1 (skip control edge) for most Nodes.
+        // SCMemProj's might be the sole use of a StoreLConditional.
+        // While StoreLConditionals set memory (the SCMemProj use)
+        // they also def flags; if that flag def is unused the
+        // allocator sees a flag-setting instruction with no use of
+        // the flags and assumes it's dead.  This keeps the (useless)
+        // flag-setting behavior alive while also keeping the (useful)
+        // memory update effect.
+        for( uint k = ((n->Opcode() == Op_SCMemProj) ? 0:1); k < n->req(); k++ ) {
+          Node *def = n->in(k);
+          uint x = n2lidx(def);
+          if( !x ) continue;
+          LRG &lrg = lrgs(x);
+          // No use-side cost for spilling debug info
+          if( k < debug_start )
+            // A USE costs twice block frequency (once for the Load, once
+            // for a Load-delay).  Rematerialized uses only cost once.
+            lrg._cost += (def->rematerialize() ? b->_freq : (b->_freq + b->_freq));
+          // It is live now
+          if( liveout.insert( x ) ) {
+            // Newly live things assumed live from here to top of block
+            lrg._area += cost;
+            // Adjust register pressure
+            if( lrg.mask().is_UP() && lrg.mask_size() ) {
+              if( lrg._is_float ) {
+                pressure[1] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+                if( pressure[1] > b->_freg_pressure )
+                  b->_freg_pressure = pressure[1];
+#endif
+              } else if( lrg.mask().overlap(*Matcher::idealreg2regmask[Op_RegI]) ) {
+                pressure[0] += lrg.reg_pressure();
+#ifdef EXACT_PRESSURE
+                if( pressure[0] > b->_reg_pressure )
+                  b->_reg_pressure = pressure[0];
+#endif
+              }
+            }
+            assert( pressure[0] == count_int_pressure  (&liveout), "" );
+            assert( pressure[1] == count_float_pressure(&liveout), "" );
+          }
+          assert( lrg._area >= 0, "negative spill area" );
+        }
+      }
+    } // End of reverse pass over all instructions in block
+
+    // If we run off the top of the block with high pressure and
+    // never see a hi-to-low pressure transition, just record that
+    // the whole block is high pressure.
+    if( pressure[0] > (uint)INTPRESSURE   ) {
+      hrp_index[0] = 0;
+#ifdef EXACT_PRESSURE
+      if( pressure[0] > b->_reg_pressure )
+        b->_reg_pressure = pressure[0];
+#else
+      b->_reg_pressure = (uint)INTPRESSURE+1;
+#endif
+    }
+    if( pressure[1] > (uint)FLOATPRESSURE ) {
+      hrp_index[1] = 0;
+#ifdef EXACT_PRESSURE
+      if( pressure[1] > b->_freg_pressure )
+        b->_freg_pressure = pressure[1];
+#else
+      b->_freg_pressure = (uint)FLOATPRESSURE+1;
+#endif
+    }
+
+    // Compute high pressure indice; avoid landing in the middle of projnodes
+    j = hrp_index[0];
+    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
+      Node *cur = b->_nodes[j];
+      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+        j--;
+        cur = b->_nodes[j];
+      }
+    }
+    b->_ihrp_index = j;
+    j = hrp_index[1];
+    if( j < b->_nodes.size() && j < b->end_idx()+1 ) {
+      Node *cur = b->_nodes[j];
+      while( cur->is_Proj() || (cur->is_MachNullCheck()) || cur->is_Catch() ) {
+        j--;
+        cur = b->_nodes[j];
+      }
+    }
+    b->_fhrp_index = j;
+
+#ifndef PRODUCT
+    // Gather Register Pressure Statistics
+    if( PrintOptoStatistics ) {
+      if( b->_reg_pressure > (uint)INTPRESSURE || b->_freg_pressure > (uint)FLOATPRESSURE )
+        _high_pressure++;
+      else
+        _low_pressure++;
+    }
+#endif
+  } // End of for all blocks
+
+  return must_spill;
+}
diff --git a/src/share/vm/opto/ifnode.cpp b/src/share/vm/opto/ifnode.cpp
new file mode 100644
index 000000000..48031ef3d
--- /dev/null
+++ b/src/share/vm/opto/ifnode.cpp
@@ -0,0 +1,922 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_ifnode.cpp.incl"
+
+
+extern int explicit_null_checks_elided;
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Return a tuple for whichever arm of the IF is reachable
+const Type *IfNode::Value( PhaseTransform *phase ) const {
+  if( !in(0) ) return Type::TOP;
+  if( phase->type(in(0)) == Type::TOP )
+    return Type::TOP;
+  const Type *t = phase->type(in(1));
+  if( t == Type::TOP )          // data is undefined
+    return TypeTuple::IFNEITHER; // unreachable altogether
+  if( t == TypeInt::ZERO )      // zero, or false
+    return TypeTuple::IFFALSE;  // only false branch is reachable
+  if( t == TypeInt::ONE )       // 1, or true
+    return TypeTuple::IFTRUE;   // only true branch is reachable
+  assert( t == TypeInt::BOOL, "expected boolean type" );
+
+  return TypeTuple::IFBOTH;     // No progress
+}
+
+const RegMask &IfNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//------------------------------split_if---------------------------------------
+// Look for places where we merge constants, then test on the merged value.
+// If the IF test will be constant folded on the path with the constant, we
+// win by splitting the IF to before the merge point.
+static Node* split_if(IfNode *iff, PhaseIterGVN *igvn) {
+  // I could be a lot more general here, but I'm trying to squeeze this
+  // in before the Christmas '98 break so I'm gonna be kinda restrictive
+  // on the patterns I accept.  CNC
+
+  // Look for a compare of a constant and a merged value
+  Node *i1 = iff->in(1);
+  if( !i1->is_Bool() ) return NULL;
+  BoolNode *b = i1->as_Bool();
+  Node *cmp = b->in(1);
+  if( !cmp->is_Cmp() ) return NULL;
+  i1 = cmp->in(1);
+  if( i1 == NULL || !i1->is_Phi() ) return NULL;
+  PhiNode *phi = i1->as_Phi();
+  if( phi->is_copy() ) return NULL;
+  Node *con2 = cmp->in(2);
+  if( !con2->is_Con() ) return NULL;
+  // See that the merge point contains some constants
+  Node *con1=NULL;
+  uint i4;
+  for( i4 = 1; i4 < phi->req(); i4++ ) {
+    con1 = phi->in(i4);
+    if( !con1 ) return NULL;    // Do not optimize partially collaped merges
+    if( con1->is_Con() ) break; // Found a constant
+    // Also allow null-vs-not-null checks
+    const TypePtr *tp = igvn->type(con1)->isa_ptr();
+    if( tp && tp->_ptr == TypePtr::NotNull )
+      break;
+  }
+  if( i4 >= phi->req() ) return NULL; // Found no constants
+
+  igvn->C->set_has_split_ifs(true); // Has chance for split-if
+
+  // Make sure that the compare can be constant folded away
+  Node *cmp2 = cmp->clone();
+  cmp2->set_req(1,con1);
+  cmp2->set_req(2,con2);
+  const Type *t = cmp2->Value(igvn);
+  // This compare is dead, so whack it!
+  igvn->remove_dead_node(cmp2);
+  if( !t->singleton() ) return NULL;
+
+  // No intervening control, like a simple Call
+  Node *r = iff->in(0);
+  if( !r->is_Region() ) return NULL;
+  if( phi->region() != r ) return NULL;
+  // No other users of the cmp/bool
+  if (b->outcnt() != 1 || cmp->outcnt() != 1) {
+    //tty->print_cr("many users of cmp/bool");
+    return NULL;
+  }
+
+  // Make sure we can determine where all the uses of merged values go
+  for (DUIterator_Fast jmax, j = r->fast_outs(jmax); j < jmax; j++) {
+    Node* u = r->fast_out(j);
+    if( u == r ) continue;
+    if( u == iff ) continue;
+    if( u->outcnt() == 0 ) continue; // use is dead & ignorable
+    if( !u->is_Phi() ) {
+      /*
+      if( u->is_Start() ) {
+        tty->print_cr("Region has inlined start use");
+      } else {
+        tty->print_cr("Region has odd use");
+        u->dump(2);
+      }*/
+      return NULL;
+    }
+    if( u != phi ) {
+      // CNC - do not allow any other merged value
+      //tty->print_cr("Merging another value");
+      //u->dump(2);
+      return NULL;
+    }
+    // Make sure we can account for all Phi uses
+    for (DUIterator_Fast kmax, k = u->fast_outs(kmax); k < kmax; k++) {
+      Node* v = u->fast_out(k); // User of the phi
+      // CNC - Allow only really simple patterns.
+      // In particular I disallow AddP of the Phi, a fairly common pattern
+      if( v == cmp ) continue;  // The compare is OK
+      if( (v->is_ConstraintCast()) &&
+          v->in(0)->in(0) == iff )
+        continue;               // CastPP/II of the IfNode is OK
+      // Disabled following code because I cannot tell if exactly one
+      // path dominates without a real dominator check. CNC 9/9/1999
+      //uint vop = v->Opcode();
+      //if( vop == Op_Phi ) {     // Phi from another merge point might be OK
+      //  Node *r = v->in(0);     // Get controlling point
+      //  if( !r ) return NULL;   // Degraded to a copy
+      //  // Find exactly one path in (either True or False doms, but not IFF)
+      //  int cnt = 0;
+      //  for( uint i = 1; i < r->req(); i++ )
+      //    if( r->in(i) && r->in(i)->in(0) == iff )
+      //      cnt++;
+      //  if( cnt == 1 ) continue; // Exactly one of True or False guards Phi
+      //}
+      if( !v->is_Call() ) {
+        /*
+        if( v->Opcode() == Op_AddP ) {
+          tty->print_cr("Phi has AddP use");
+        } else if( v->Opcode() == Op_CastPP ) {
+          tty->print_cr("Phi has CastPP use");
+        } else if( v->Opcode() == Op_CastII ) {
+          tty->print_cr("Phi has CastII use");
+        } else {
+          tty->print_cr("Phi has use I cant be bothered with");
+        }
+        */
+      }
+      return NULL;
+
+      /* CNC - Cut out all the fancy acceptance tests
+      // Can we clone this use when doing the transformation?
+      // If all uses are from Phis at this merge or constants, then YES.
+      if( !v->in(0) && v != cmp ) {
+        tty->print_cr("Phi has free-floating use");
+        v->dump(2);
+        return NULL;
+      }
+      for( uint l = 1; l < v->req(); l++ ) {
+        if( (!v->in(l)->is_Phi() || v->in(l)->in(0) != r) &&
+            !v->in(l)->is_Con() ) {
+          tty->print_cr("Phi has use");
+          v->dump(2);
+          return NULL;
+        } // End of if Phi-use input is neither Phi nor Constant
+      } // End of for all inputs to Phi-use
+      */
+    } // End of for all uses of Phi
+  } // End of for all uses of Region
+
+  // Only do this if the IF node is in a sane state
+  if (iff->outcnt() != 2)
+    return NULL;
+
+  // Got a hit!  Do the Mondo Hack!
+  //
+  //ABC  a1c   def   ghi            B     1     e     h   A C   a c   d f   g i
+  // R - Phi - Phi - Phi            Rc - Phi - Phi - Phi   Rx - Phi - Phi - Phi
+  //     cmp - 2                         cmp - 2               cmp - 2
+  //       bool                            bool_c                bool_x
+  //       if                               if_c                  if_x
+  //      T  F                              T  F                  T  F
+  // ..s..    ..t ..                   ..s..    ..t..        ..s..    ..t..
+  //
+  // Split the paths coming into the merge point into 2 seperate groups of
+  // merges.  On the left will be all the paths feeding constants into the
+  // Cmp's Phi.  On the right will be the remaining paths.  The Cmp's Phi
+  // will fold up into a constant; this will let the Cmp fold up as well as
+  // all the control flow.  Below the original IF we have 2 control
+  // dependent regions, 's' and 't'.  Now we will merge the two paths
+  // just prior to 's' and 't' from the two IFs.  At least 1 path (and quite
+  // likely 2 or more) will promptly constant fold away.
+  PhaseGVN *phase = igvn;
+
+  // Make a region merging constants and a region merging the rest
+  uint req_c = 0;
+  for (uint ii = 1; ii < r->req(); ii++) {
+    if( phi->in(ii) == con1 ) {
+      req_c++;
+    }
+  }
+  Node *region_c = new (igvn->C, req_c + 1) RegionNode(req_c + 1);
+  Node *phi_c    = con1;
+  uint  len      = r->req();
+  Node *region_x = new (igvn->C, len - req_c + 1) RegionNode(len - req_c + 1);
+  Node *phi_x    = PhiNode::make_blank(region_x, phi);
+  for (uint i = 1, i_c = 1, i_x = 1; i < len; i++) {
+    if( phi->in(i) == con1 ) {
+      region_c->init_req( i_c++, r  ->in(i) );
+    } else {
+      region_x->init_req( i_x,   r  ->in(i) );
+      phi_x   ->init_req( i_x++, phi->in(i) );
+    }
+  }
+
+  // Register the new RegionNodes but do not transform them.  Cannot
+  // transform until the entire Region/Phi conglerate has been hacked
+  // as a single huge transform.
+  igvn->register_new_node_with_optimizer( region_c );
+  igvn->register_new_node_with_optimizer( region_x );
+  phi_x = phase->transform( phi_x );
+  // Prevent the untimely death of phi_x.  Currently he has no uses.  He is
+  // about to get one.  If this only use goes away, then phi_x will look dead.
+  // However, he will be picking up some more uses down below.
+  Node *hook = new (igvn->C, 4) Node(4);
+  hook->init_req(0, phi_x);
+  hook->init_req(1, phi_c);
+
+  // Make the compare
+  Node *cmp_c = phase->makecon(t);
+  Node *cmp_x = cmp->clone();
+  cmp_x->set_req(1,phi_x);
+  cmp_x->set_req(2,con2);
+  cmp_x = phase->transform(cmp_x);
+  // Make the bool
+  Node *b_c = phase->transform(new (igvn->C, 2) BoolNode(cmp_c,b->_test._test));
+  Node *b_x = phase->transform(new (igvn->C, 2) BoolNode(cmp_x,b->_test._test));
+  // Make the IfNode
+  IfNode *iff_c = new (igvn->C, 2) IfNode(region_c,b_c,iff->_prob,iff->_fcnt);
+  igvn->set_type_bottom(iff_c);
+  igvn->_worklist.push(iff_c);
+  hook->init_req(2, iff_c);
+
+  IfNode *iff_x = new (igvn->C, 2) IfNode(region_x,b_x,iff->_prob, iff->_fcnt);
+  igvn->set_type_bottom(iff_x);
+  igvn->_worklist.push(iff_x);
+  hook->init_req(3, iff_x);
+
+  // Make the true/false arms
+  Node *iff_c_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_c));
+  Node *iff_c_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_c));
+  Node *iff_x_t = phase->transform(new (igvn->C, 1) IfTrueNode (iff_x));
+  Node *iff_x_f = phase->transform(new (igvn->C, 1) IfFalseNode(iff_x));
+
+  // Merge the TRUE paths
+  Node *region_s = new (igvn->C, 3) RegionNode(3);
+  igvn->_worklist.push(region_s);
+  region_s->init_req(1, iff_c_t);
+  region_s->init_req(2, iff_x_t);
+  igvn->register_new_node_with_optimizer( region_s );
+
+  // Merge the FALSE paths
+  Node *region_f = new (igvn->C, 3) RegionNode(3);
+  igvn->_worklist.push(region_f);
+  region_f->init_req(1, iff_c_f);
+  region_f->init_req(2, iff_x_f);
+  igvn->register_new_node_with_optimizer( region_f );
+
+  igvn->hash_delete(cmp);// Remove soon-to-be-dead node from hash table.
+  cmp->set_req(1,NULL);  // Whack the inputs to cmp because it will be dead
+  cmp->set_req(2,NULL);
+  // Check for all uses of the Phi and give them a new home.
+  // The 'cmp' got cloned, but CastPP/IIs need to be moved.
+  Node *phi_s = NULL;     // do not construct unless needed
+  Node *phi_f = NULL;     // do not construct unless needed
+  for (DUIterator_Last i2min, i2 = phi->last_outs(i2min); i2 >= i2min; --i2) {
+    Node* v = phi->last_out(i2);// User of the phi
+    igvn->hash_delete(v);       // Have to fixup other Phi users
+    igvn->_worklist.push(v);
+    uint vop = v->Opcode();
+    Node *proj = NULL;
+    if( vop == Op_Phi ) {       // Remote merge point
+      Node *r = v->in(0);
+      for (uint i3 = 1; i3 < r->req(); i3++)
+        if (r->in(i3) && r->in(i3)->in(0) == iff) {
+          proj = r->in(i3);
+          break;
+        }
+    } else if( v->is_ConstraintCast() ) {
+      proj = v->in(0);          // Controlling projection
+    } else {
+      assert( 0, "do not know how to handle this guy" );
+    }
+
+    Node *proj_path_data, *proj_path_ctrl;
+    if( proj->Opcode() == Op_IfTrue ) {
+      if( phi_s == NULL ) {
+        // Only construct phi_s if needed, otherwise provides
+        // interfering use.
+        phi_s = PhiNode::make_blank(region_s,phi);
+        phi_s->init_req( 1, phi_c );
+        phi_s->init_req( 2, phi_x );
+        phi_s = phase->transform(phi_s);
+      }
+      proj_path_data = phi_s;
+      proj_path_ctrl = region_s;
+    } else {
+      if( phi_f == NULL ) {
+        // Only construct phi_f if needed, otherwise provides
+        // interfering use.
+        phi_f = PhiNode::make_blank(region_f,phi);
+        phi_f->init_req( 1, phi_c );
+        phi_f->init_req( 2, phi_x );
+        phi_f = phase->transform(phi_f);
+      }
+      proj_path_data = phi_f;
+      proj_path_ctrl = region_f;
+    }
+
+    // Fixup 'v' for for the split
+    if( vop == Op_Phi ) {       // Remote merge point
+      uint i;
+      for( i = 1; i < v->req(); i++ )
+        if( v->in(i) == phi )
+          break;
+      v->set_req(i, proj_path_data );
+    } else if( v->is_ConstraintCast() ) {
+      v->set_req(0, proj_path_ctrl );
+      v->set_req(1, proj_path_data );
+    } else
+      ShouldNotReachHere();
+  }
+
+  // Now replace the original iff's True/False with region_s/region_t.
+  // This makes the original iff go dead.
+  for (DUIterator_Last i3min, i3 = iff->last_outs(i3min); i3 >= i3min; --i3) {
+    Node* p = iff->last_out(i3);
+    assert( p->Opcode() == Op_IfTrue || p->Opcode() == Op_IfFalse, "" );
+    Node *u = (p->Opcode() == Op_IfTrue) ? region_s : region_f;
+    // Replace p with u
+    igvn->add_users_to_worklist(p);
+    for (DUIterator_Last lmin, l = p->last_outs(lmin); l >= lmin;) {
+      Node* x = p->last_out(l);
+      igvn->hash_delete(x);
+      uint uses_found = 0;
+      for( uint j = 0; j < x->req(); j++ ) {
+        if( x->in(j) == p ) {
+          x->set_req(j, u);
+          uses_found++;
+        }
+      }
+      l -= uses_found;    // we deleted 1 or more copies of this edge
+    }
+    igvn->remove_dead_node(p);
+  }
+
+  // Force the original merge dead
+  igvn->hash_delete(r);
+  r->set_req_X(0,NULL,igvn);
+
+  // Now remove the bogus extra edges used to keep things alive
+  igvn->remove_dead_node( hook );
+
+  // Must return either the original node (now dead) or a new node
+  // (Do not return a top here, since that would break the uniqueness of top.)
+  return new (igvn->C, 1) ConINode(TypeInt::ZERO);
+}
+
+//------------------------------is_range_check---------------------------------
+// Return 0 if not a range check.  Return 1 if a range check and set index and
+// offset.  Return 2 if we had to negate the test.  Index is NULL if the check
+// is versus a constant.
+int IfNode::is_range_check(Node* &range, Node* &index, jint &offset) {
+  Node* b = in(1);
+  if (b == NULL || !b->is_Bool())  return 0;
+  BoolNode* bn = b->as_Bool();
+  Node* cmp = bn->in(1);
+  if (cmp == NULL)  return 0;
+  if (cmp->Opcode() != Op_CmpU)  return 0;
+
+  Node* l = cmp->in(1);
+  Node* r = cmp->in(2);
+  int flip_test = 1;
+  if (bn->_test._test == BoolTest::le) {
+    l = cmp->in(2);
+    r = cmp->in(1);
+    flip_test = 2;
+  } else if (bn->_test._test != BoolTest::lt) {
+    return 0;
+  }
+  if (l->is_top())  return 0;   // Top input means dead test
+  if (r->Opcode() != Op_LoadRange)  return 0;
+
+  // We have recognized one of these forms:
+  //  Flip 1:  If (Bool[<] CmpU(l, LoadRange)) ...
+  //  Flip 2:  If (Bool[<=] CmpU(LoadRange, l)) ...
+
+  // Make sure it's a real range check by requiring an uncommon trap
+  // along the OOB path.  Otherwise, it's possible that the user wrote
+  // something which optimized to look like a range check but behaves
+  // in some other way.
+  Node* iftrap = proj_out(flip_test == 2 ? true : false);
+  bool found_trap = false;
+  if (iftrap != NULL) {
+    Node* u = iftrap->unique_ctrl_out();
+    if (u != NULL) {
+      // It could be a merge point (Region) for uncommon trap.
+      if (u->is_Region()) {
+        Node* c = u->unique_ctrl_out();
+        if (c != NULL) {
+          iftrap = u;
+          u = c;
+        }
+      }
+      if (u->in(0) == iftrap && u->is_CallStaticJava()) {
+        int req = u->as_CallStaticJava()->uncommon_trap_request();
+        if (Deoptimization::trap_request_reason(req) ==
+            Deoptimization::Reason_range_check) {
+          found_trap = true;
+        }
+      }
+    }
+  }
+  if (!found_trap)  return 0;   // sorry, no cigar
+
+  // Look for index+offset form
+  Node* ind = l;
+  jint  off = 0;
+  if (l->is_top()) {
+    return 0;
+  } else if (l->is_Add()) {
+    if ((off = l->in(1)->find_int_con(0)) != 0) {
+      ind = l->in(2);
+    } else if ((off = l->in(2)->find_int_con(0)) != 0) {
+      ind = l->in(1);
+    }
+  } else if ((off = l->find_int_con(-1)) >= 0) {
+    // constant offset with no variable index
+    ind = NULL;
+  } else {
+    // variable index with no constant offset (or dead negative index)
+    off = 0;
+  }
+
+  // Return all the values:
+  index  = ind;
+  offset = off;
+  range  = r;
+  return flip_test;
+}
+
+//------------------------------adjust_check-----------------------------------
+// Adjust (widen) a prior range check
+static void adjust_check(Node* proj, Node* range, Node* index,
+                         int flip, jint off_lo, PhaseIterGVN* igvn) {
+  PhaseGVN *gvn = igvn;
+  // Break apart the old check
+  Node *iff = proj->in(0);
+  Node *bol = iff->in(1);
+  if( bol->is_top() ) return;   // In case a partially dead range check appears
+  // bail (or bomb[ASSERT/DEBUG]) if NOT projection-->IfNode-->BoolNode
+  DEBUG_ONLY( if( !bol->is_Bool() ) { proj->dump(3); fatal("Expect projection-->IfNode-->BoolNode"); } )
+  if( !bol->is_Bool() ) return;
+
+  Node *cmp = bol->in(1);
+  // Compute a new check
+  Node *new_add = gvn->intcon(off_lo);
+  if( index ) {
+    new_add = off_lo ? gvn->transform(new (gvn->C, 3) AddINode( index, new_add )) : index;
+  }
+  Node *new_cmp = (flip == 1)
+    ? new (gvn->C, 3) CmpUNode( new_add, range )
+    : new (gvn->C, 3) CmpUNode( range, new_add );
+  new_cmp = gvn->transform(new_cmp);
+  // See if no need to adjust the existing check
+  if( new_cmp == cmp ) return;
+  // Else, adjust existing check
+  Node *new_bol = gvn->transform( new (gvn->C, 2) BoolNode( new_cmp, bol->as_Bool()->_test._test ) );
+  igvn->hash_delete( iff );
+  iff->set_req_X( 1, new_bol, igvn );
+}
+
+//------------------------------up_one_dom-------------------------------------
+// Walk up the dominator tree one step.  Return NULL at root or true
+// complex merges.  Skips through small diamonds.
+Node* IfNode::up_one_dom(Node *curr, bool linear_only) {
+  Node *dom = curr->in(0);
+  if( !dom )                    // Found a Region degraded to a copy?
+    return curr->nonnull_req(); // Skip thru it
+
+  if( curr != dom )             // Normal walk up one step?
+    return dom;
+
+  // Use linear_only if we are still parsing, since we cannot
+  // trust the regions to be fully filled in.
+  if (linear_only)
+    return NULL;
+
+  // Else hit a Region.  Check for a loop header
+  if( dom->is_Loop() )
+    return dom->in(1);          // Skip up thru loops
+
+  // Check for small diamonds
+  Node *din1, *din2, *din3, *din4;
+  if( dom->req() == 3 &&        // 2-path merge point
+      (din1 = dom ->in(1)) &&   // Left  path exists
+      (din2 = dom ->in(2)) &&   // Right path exists
+      (din3 = din1->in(0)) &&   // Left  path up one
+      (din4 = din2->in(0)) ) {  // Right path up one
+    if( din3->is_Call() &&      // Handle a slow-path call on either arm
+        (din3 = din3->in(0)) )
+      din3 = din3->in(0);
+    if( din4->is_Call() &&      // Handle a slow-path call on either arm
+        (din4 = din4->in(0)) )
+      din4 = din4->in(0);
+    if( din3 == din4 && din3->is_If() )
+      return din3;              // Skip around diamonds
+  }
+
+  // Give up the search at true merges
+  return NULL;                  // Dead loop?  Or hit root?
+}
+
+//------------------------------remove_useless_bool----------------------------
+// Check for people making a useless boolean: things like
+// if( (x < y ? true : false) ) { ... }
+// Replace with if( x < y ) { ... }
+static Node *remove_useless_bool(IfNode *iff, PhaseGVN *phase) {
+  Node *i1 = iff->in(1);
+  if( !i1->is_Bool() ) return NULL;
+  BoolNode *bol = i1->as_Bool();
+
+  Node *cmp = bol->in(1);
+  if( cmp->Opcode() != Op_CmpI ) return NULL;
+
+  // Must be comparing against a bool
+  const Type *cmp2_t = phase->type( cmp->in(2) );
+  if( cmp2_t != TypeInt::ZERO &&
+      cmp2_t != TypeInt::ONE )
+    return NULL;
+
+  // Find a prior merge point merging the boolean
+  i1 = cmp->in(1);
+  if( !i1->is_Phi() ) return NULL;
+  PhiNode *phi = i1->as_Phi();
+  if( phase->type( phi ) != TypeInt::BOOL )
+    return NULL;
+
+  // Check for diamond pattern
+  int true_path = phi->is_diamond_phi();
+  if( true_path == 0 ) return NULL;
+
+  // phi->region->if_proj->ifnode->bool->cmp
+  BoolNode *bol2 = phi->in(0)->in(1)->in(0)->in(1)->as_Bool();
+
+  // Now get the 'sense' of the test correct so we can plug in
+  // either iff2->in(1) or its complement.
+  int flip = 0;
+  if( bol->_test._test == BoolTest::ne ) flip = 1-flip;
+  else if( bol->_test._test != BoolTest::eq ) return NULL;
+  if( cmp2_t == TypeInt::ZERO ) flip = 1-flip;
+
+  const Type *phi1_t = phase->type( phi->in(1) );
+  const Type *phi2_t = phase->type( phi->in(2) );
+  // Check for Phi(0,1) and flip
+  if( phi1_t == TypeInt::ZERO ) {
+    if( phi2_t != TypeInt::ONE ) return NULL;
+    flip = 1-flip;
+  } else {
+    // Check for Phi(1,0)
+    if( phi1_t != TypeInt::ONE  ) return NULL;
+    if( phi2_t != TypeInt::ZERO ) return NULL;
+  }
+  if( true_path == 2 ) {
+    flip = 1-flip;
+  }
+
+  Node* new_bol = (flip ? phase->transform( bol2->negate(phase) ) : bol2);
+  iff->set_req(1, new_bol);
+  // Intervening diamond probably goes dead
+  phase->C->set_major_progress();
+  return iff;
+}
+
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff);
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *IfNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (remove_dead_region(phase, can_reshape))  return this;
+  // No Def-Use info?
+  if (!can_reshape)  return NULL;
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+
+  // Don't bother trying to transform a dead if
+  if (in(0)->is_top())  return NULL;
+  // Don't bother trying to transform an if with a dead test
+  if (in(1)->is_top())  return NULL;
+  // Another variation of a dead test
+  if (in(1)->is_Con())  return NULL;
+  // Another variation of a dead if
+  if (outcnt() < 2)  return NULL;
+
+  // Canonicalize the test.
+  Node* idt_if = idealize_test(phase, this);
+  if (idt_if != NULL)  return idt_if;
+
+  // Try to split the IF
+  Node *s = split_if(this, igvn);
+  if (s != NULL)  return s;
+
+  // Check for people making a useless boolean: things like
+  // if( (x < y ? true : false) ) { ... }
+  // Replace with if( x < y ) { ... }
+  Node *bol2 = remove_useless_bool(this, phase);
+  if( bol2 ) return bol2;
+
+  // Setup to scan up the CFG looking for a dominating test
+  Node *dom = in(0);
+  Node *prev_dom = this;
+
+  // Check for range-check vs other kinds of tests
+  Node *index1, *range1;
+  jint offset1;
+  int flip1 = is_range_check(range1, index1, offset1);
+  if( flip1 ) {
+    Node *first_prev_dom = NULL;
+
+    // Try to remove extra range checks.  All 'up_one_dom' gives up at merges
+    // so all checks we inspect post-dominate the top-most check we find.
+    // If we are going to fail the current check and we reach the top check
+    // then we are guarenteed to fail, so just start interpreting there.
+    // We 'expand' the top 2 range checks to include all post-dominating
+    // checks.
+
+    // The top 2 range checks seen
+    Node *prev_chk1 = NULL;
+    Node *prev_chk2 = NULL;
+    // Low and high offsets seen so far
+    jint off_lo = offset1;
+    jint off_hi = offset1;
+
+    // Scan for the top 2 checks and collect range of offsets
+    for( int dist = 0; dist < 999; dist++ ) { // Range-Check scan limit
+      if( dom->Opcode() == Op_If &&  // Not same opcode?
+          prev_dom->in(0) == dom ) { // One path of test does dominate?
+        if( dom == this ) return NULL; // dead loop
+        // See if this is a range check
+        Node *index2, *range2;
+        jint offset2;
+        int flip2 = dom->as_If()->is_range_check(range2, index2, offset2);
+        // See if this is a _matching_ range check, checking against
+        // the same array bounds.
+        if( flip2 == flip1 && range2 == range1 && index2 == index1 &&
+            dom->outcnt() == 2 ) {
+          // Gather expanded bounds
+          off_lo = MIN2(off_lo,offset2);
+          off_hi = MAX2(off_hi,offset2);
+          // Record top 2 range checks
+          prev_chk2 = prev_chk1;
+          prev_chk1 = prev_dom;
+          // If we match the test exactly, then the top test covers
+          // both our lower and upper bounds.
+          if( dom->in(1) == in(1) )
+            prev_chk2 = prev_chk1;
+        }
+      }
+      prev_dom = dom;
+      dom = up_one_dom( dom );
+      if( !dom ) break;
+    }
+
+
+    // Attempt to widen the dominating range check to cover some later
+    // ones.  Since range checks "fail" by uncommon-trapping to the
+    // interpreter, widening a check can make us speculative enter the
+    // interpreter.  If we see range-check deopt's, do not widen!
+    if (!phase->C->allow_range_check_smearing())  return NULL;
+
+    // Constant indices only need to check the upper bound.
+    // Non-constance indices must check both low and high.
+    if( index1 ) {
+      // Didn't find 2 prior covering checks, so cannot remove anything.
+      if( !prev_chk2 ) return NULL;
+      // 'Widen' the offsets of the 1st and 2nd covering check
+      adjust_check( prev_chk1, range1, index1, flip1, off_lo, igvn );
+      // Do not call adjust_check twice on the same projection
+      // as the first call may have transformed the BoolNode to a ConI
+      if( prev_chk1 != prev_chk2 ) {
+        adjust_check( prev_chk2, range1, index1, flip1, off_hi, igvn );
+      }
+      // Test is now covered by prior checks, dominate it out
+      prev_dom = prev_chk2;
+    } else {
+      // Didn't find prior covering check, so cannot remove anything.
+      if( !prev_chk1 ) return NULL;
+      // 'Widen' the offset of the 1st and only covering check
+      adjust_check( prev_chk1, range1, index1, flip1, off_hi, igvn );
+      // Test is now covered by prior checks, dominate it out
+      prev_dom = prev_chk1;
+    }
+
+
+  } else {                      // Scan for an equivalent test
+
+    Node *cmp;
+    int dist = 0;               // Cutoff limit for search
+    int op = Opcode();
+    if( op == Op_If &&
+        (cmp=in(1)->in(1))->Opcode() == Op_CmpP ) {
+      if( cmp->in(2) != NULL && // make sure cmp is not already dead
+          cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+        dist = 64;              // Limit for null-pointer scans
+      } else {
+        dist = 4;               // Do not bother for random pointer tests
+      }
+    } else {
+      dist = 4;                 // Limit for random junky scans
+    }
+
+    // Normal equivalent-test check.
+    if( !dom ) return NULL;     // Dead loop?
+
+    // Search up the dominator tree for an If with an identical test
+    while( dom->Opcode() != op    ||  // Not same opcode?
+           dom->in(1)    != in(1) ||  // Not same input 1?
+           (req() == 3 && dom->in(2) != in(2)) || // Not same input 2?
+           prev_dom->in(0) != dom ) { // One path of test does not dominate?
+      if( dist < 0 ) return NULL;
+
+      dist--;
+      prev_dom = dom;
+      dom = up_one_dom( dom );
+      if( !dom ) return NULL;
+    }
+
+    // Check that we did not follow a loop back to ourselves
+    if( this == dom )
+      return NULL;
+
+    if( dist > 2 )              // Add to count of NULL checks elided
+      explicit_null_checks_elided++;
+
+  } // End of Else scan for an equivalent test
+
+  // Hit!  Remove this IF
+#ifndef PRODUCT
+  if( TraceIterativeGVN ) {
+    tty->print("   Removing IfNode: "); this->dump();
+  }
+  if( VerifyOpto && !phase->allow_progress() ) {
+    // Found an equivalent dominating test,
+    // we can not guarantee reaching a fix-point for these during iterativeGVN
+    // since intervening nodes may not change.
+    return NULL;
+  }
+#endif
+
+  // Replace dominated IfNode
+  dominated_by( prev_dom, igvn );
+
+  // Must return either the original node (now dead) or a new node
+  // (Do not return a top here, since that would break the uniqueness of top.)
+  return new (phase->C, 1) ConINode(TypeInt::ZERO);
+}
+
+//------------------------------dominated_by-----------------------------------
+void IfNode::dominated_by( Node *prev_dom, PhaseIterGVN *igvn ) {
+  igvn->hash_delete(this);      // Remove self to prevent spurious V-N
+  Node *idom = in(0);
+  // Need opcode to decide which way 'this' test goes
+  int prev_op = prev_dom->Opcode();
+  Node *top = igvn->C->top(); // Shortcut to top
+
+  // Now walk the current IfNode's projections.
+  // Loop ends when 'this' has no more uses.
+  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; --i) {
+    Node *ifp = last_out(i);     // Get IfTrue/IfFalse
+    igvn->add_users_to_worklist(ifp);
+    // Check which projection it is and set target.
+    // Data-target is either the dominating projection of the same type
+    // or TOP if the dominating projection is of opposite type.
+    // Data-target will be used as the new control edge for the non-CFG
+    // nodes like Casts and Loads.
+    Node *data_target = (ifp->Opcode() == prev_op ) ? prev_dom : top;
+    // Control-target is just the If's immediate dominator or TOP.
+    Node *ctrl_target = (ifp->Opcode() == prev_op ) ?     idom : top;
+
+    // For each child of an IfTrue/IfFalse projection, reroute.
+    // Loop ends when projection has no more uses.
+    for (DUIterator_Last jmin, j = ifp->last_outs(jmin); j >= jmin; --j) {
+      Node* s = ifp->last_out(j);   // Get child of IfTrue/IfFalse
+      igvn->hash_delete(s);         // Yank from hash table before edge hacking
+      if( !s->depends_only_on_test() ) {
+        // Find the control input matching this def-use edge.
+        // For Regions it may not be in slot 0.
+        uint l;
+        for( l = 0; s->in(l) != ifp; l++ ) { }
+        s->set_req(l, ctrl_target);
+      } else {                      // Else, for control producers,
+        s->set_req(0, data_target); // Move child to data-target
+      }
+      igvn->_worklist.push(s);  // Revisit collapsed Phis
+    } // End for each child of a projection
+
+    igvn->remove_dead_node(ifp);
+  } // End for each IfTrue/IfFalse child of If
+
+  // Kill the IfNode
+  igvn->remove_dead_node(this);
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfTrueNode::Identity( PhaseTransform *phase ) {
+  // Can only optimize if cannot go the other way
+  const TypeTuple *t = phase->type(in(0))->is_tuple();
+  return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFTRUE )
+    ? in(0)->in(0)              // IfNode control
+    : this;                     // no progress
+}
+
+//------------------------------dump_spec--------------------------------------
+#ifndef PRODUCT
+void IfNode::dump_spec(outputStream *st) const {
+  st->print("P=%f, C=%f",_prob,_fcnt);
+}
+#endif
+
+//------------------------------idealize_test----------------------------------
+// Try to canonicalize tests better.  Peek at the Cmp/Bool/If sequence and
+// come up with a canonical sequence.  Bools getting 'eq', 'gt' and 'ge' forms
+// converted to 'ne', 'le' and 'lt' forms.  IfTrue/IfFalse get swapped as
+// needed.
+static IfNode* idealize_test(PhaseGVN* phase, IfNode* iff) {
+  assert(iff->in(0) != NULL, "If must be live");
+
+  if (iff->outcnt() != 2)  return NULL; // Malformed projections.
+  Node* old_if_f = iff->proj_out(false);
+  Node* old_if_t = iff->proj_out(true);
+
+  // CountedLoopEnds want the back-control test to be TRUE, irregardless of
+  // whether they are testing a 'gt' or 'lt' condition.  The 'gt' condition
+  // happens in count-down loops
+  if (iff->is_CountedLoopEnd())  return NULL;
+  if (!iff->in(1)->is_Bool())  return NULL; // Happens for partially optimized IF tests
+  BoolNode *b = iff->in(1)->as_Bool();
+  BoolTest bt = b->_test;
+  // Test already in good order?
+  if( bt.is_canonical() )
+    return NULL;
+
+  // Flip test to be canonical.  Requires flipping the IfFalse/IfTrue and
+  // cloning the IfNode.
+  Node* new_b = phase->transform( new (phase->C, 2) BoolNode(b->in(1), bt.negate()) );
+  if( !new_b->is_Bool() ) return NULL;
+  b = new_b->as_Bool();
+
+  PhaseIterGVN *igvn = phase->is_IterGVN();
+  assert( igvn, "Test is not canonical in parser?" );
+
+  // The IF node never really changes, but it needs to be cloned
+  iff = new (phase->C, 2) IfNode( iff->in(0), b, 1.0-iff->_prob, iff->_fcnt);
+
+  Node *prior = igvn->hash_find_insert(iff);
+  if( prior ) {
+    igvn->remove_dead_node(iff);
+    iff = (IfNode*)prior;
+  } else {
+    // Cannot call transform on it just yet
+    igvn->set_type_bottom(iff);
+  }
+  igvn->_worklist.push(iff);
+
+  // Now handle projections.  Cloning not required.
+  Node* new_if_f = (Node*)(new (phase->C, 1) IfFalseNode( iff ));
+  Node* new_if_t = (Node*)(new (phase->C, 1) IfTrueNode ( iff ));
+
+  igvn->register_new_node_with_optimizer(new_if_f);
+  igvn->register_new_node_with_optimizer(new_if_t);
+  igvn->hash_delete(old_if_f);
+  igvn->hash_delete(old_if_t);
+  // Flip test, so flip trailing control
+  igvn->subsume_node(old_if_f, new_if_t);
+  igvn->subsume_node(old_if_t, new_if_f);
+
+  // Progress
+  return iff;
+}
+
+//------------------------------Identity---------------------------------------
+// If the test is constant & we match, then we are the input Control
+Node *IfFalseNode::Identity( PhaseTransform *phase ) {
+  // Can only optimize if cannot go the other way
+  const TypeTuple *t = phase->type(in(0))->is_tuple();
+  return ( t == TypeTuple::IFNEITHER || t == TypeTuple::IFFALSE )
+    ? in(0)->in(0)              // IfNode control
+    : this;                     // no progress
+}
diff --git a/src/share/vm/opto/indexSet.cpp b/src/share/vm/opto/indexSet.cpp
new file mode 100644
index 000000000..078315bee
--- /dev/null
+++ b/src/share/vm/opto/indexSet.cpp
@@ -0,0 +1,573 @@
+/*
+ * Copyright 1998-2004 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file defines the IndexSet class, a set of sparse integer indices.
+// This data structure is used by the compiler in its liveness analysis and
+// during register allocation.  It also defines an iterator for this class.
+
+#include "incls/_precompiled.incl"
+#include "incls/_indexSet.cpp.incl"
+
+//-------------------------------- Initializations ------------------------------
+
+IndexSet::BitBlock  IndexSet::_empty_block     = IndexSet::BitBlock();
+
+#ifdef ASSERT
+// Initialize statistics counters
+uint IndexSet::_alloc_new = 0;
+uint IndexSet::_alloc_total = 0;
+
+long IndexSet::_total_bits = 0;
+long IndexSet::_total_used_blocks = 0;
+long IndexSet::_total_unused_blocks = 0;
+
+// Per set, or all sets operation tracing
+int IndexSet::_serial_count = 1;
+#endif
+
+// What is the first set bit in a 5 bit integer?
+const byte IndexSetIterator::_first_bit[32] = {
+  0, 0, 1, 0,
+  2, 0, 1, 0,
+  3, 0, 1, 0,
+  2, 0, 1, 0,
+  4, 0, 1, 0,
+  2, 0, 1, 0,
+  3, 0, 1, 0,
+  2, 0, 1, 0
+};
+
+// What is the second set bit in a 5 bit integer?
+const byte IndexSetIterator::_second_bit[32] = {
+  5, 5, 5, 1,
+  5, 2, 2, 1,
+  5, 3, 3, 1,
+  3, 2, 2, 1,
+  5, 4, 4, 1,
+  4, 2, 2, 1,
+  4, 3, 3, 1,
+  3, 2, 2, 1
+};
+
+// I tried implementing the IndexSetIterator with a window_size of 8 and
+// didn't seem to get a noticeable speedup.  I am leaving in the tables
+// in case we want to switch back.
+
+/*const byte IndexSetIterator::_first_bit[256] = {
+  8, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
+  4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
+};
+
+const byte IndexSetIterator::_second_bit[256] = {
+  8, 8, 8, 1, 8, 2, 2, 1, 8, 3, 3, 1, 3, 2, 2, 1,
+  8, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  8, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  8, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
+  6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  8, 7, 7, 1, 7, 2, 2, 1, 7, 3, 3, 1, 3, 2, 2, 1,
+  7, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  7, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  7, 6, 6, 1, 6, 2, 2, 1, 6, 3, 3, 1, 3, 2, 2, 1,
+  6, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1,
+  6, 5, 5, 1, 5, 2, 2, 1, 5, 3, 3, 1, 3, 2, 2, 1,
+  5, 4, 4, 1, 4, 2, 2, 1, 4, 3, 3, 1, 3, 2, 2, 1
+};*/
+
+//---------------------------- IndexSet::populate_free_list() -----------------------------
+// Populate the free BitBlock list with a batch of BitBlocks.  The BitBlocks
+// are 32 bit aligned.
+
+void IndexSet::populate_free_list() {
+  Compile *compile = Compile::current();
+  BitBlock *free = (BitBlock*)compile->indexSet_free_block_list();
+
+  char *mem = (char*)arena()->Amalloc_4(sizeof(BitBlock) *
+                                        bitblock_alloc_chunk_size + 32);
+
+  // Align the pointer to a 32 bit boundary.
+  BitBlock *new_blocks = (BitBlock*)(((uintptr_t)mem + 32) & ~0x001F);
+
+  // Add the new blocks to the free list.
+  for (int i = 0; i < bitblock_alloc_chunk_size; i++) {
+    new_blocks->set_next(free);
+    free = new_blocks;
+    new_blocks++;
+  }
+
+  compile->set_indexSet_free_block_list(free);
+
+#ifdef ASSERT
+  if (CollectIndexSetStatistics) {
+    _alloc_new += bitblock_alloc_chunk_size;
+  }
+#endif
+}
+
+
+//---------------------------- IndexSet::alloc_block() ------------------------
+// Allocate a BitBlock from the free list.  If the free list is empty,
+// prime it.
+
+IndexSet::BitBlock *IndexSet::alloc_block() {
+#ifdef ASSERT
+  if (CollectIndexSetStatistics) {
+    _alloc_total++;
+  }
+#endif
+  Compile *compile = Compile::current();
+  BitBlock* free_list = (BitBlock*)compile->indexSet_free_block_list();
+  if (free_list == NULL) {
+    populate_free_list();
+    free_list = (BitBlock*)compile->indexSet_free_block_list();
+  }
+  BitBlock *block = free_list;
+  compile->set_indexSet_free_block_list(block->next());
+
+  block->clear();
+  return block;
+}
+
+//---------------------------- IndexSet::alloc_block_containing() -------------
+// Allocate a new BitBlock and put it into the position in the _blocks array
+// corresponding to element.
+
+IndexSet::BitBlock *IndexSet::alloc_block_containing(uint element) {
+  BitBlock *block = alloc_block();
+  uint bi = get_block_index(element);
+  _blocks[bi] = block;
+  return block;
+}
+
+//---------------------------- IndexSet::free_block() -------------------------
+// Add a BitBlock to the free list.
+
+void IndexSet::free_block(uint i) {
+  debug_only(check_watch("free block", i));
+  assert(i < _max_blocks, "block index too large");
+  BitBlock *block = _blocks[i];
+  assert(block != &_empty_block, "cannot free the empty block");
+  block->set_next((IndexSet::BitBlock*)Compile::current()->indexSet_free_block_list());
+  Compile::current()->set_indexSet_free_block_list(block);
+  set_block(i,&_empty_block);
+}
+
+//------------------------------lrg_union--------------------------------------
+// Compute the union of all elements of one and two which interfere with
+// the RegMask mask.  If the degree of the union becomes exceeds
+// fail_degree, the union bails out.  The underlying set is cleared before
+// the union is performed.
+
+uint IndexSet::lrg_union(uint lr1, uint lr2,
+                         const uint fail_degree,
+                         const PhaseIFG *ifg,
+                         const RegMask &mask ) {
+  IndexSet *one = ifg->neighbors(lr1);
+  IndexSet *two = ifg->neighbors(lr2);
+  LRG &lrg1 = ifg->lrgs(lr1);
+  LRG &lrg2 = ifg->lrgs(lr2);
+#ifdef ASSERT
+  assert(_max_elements == one->_max_elements, "max element mismatch");
+  check_watch("union destination");
+  one->check_watch("union source");
+  two->check_watch("union source");
+#endif
+
+  // Compute the degree of the combined live-range.  The combined
+  // live-range has the union of the original live-ranges' neighbors set as
+  // well as the neighbors of all intermediate copies, minus those neighbors
+  // that can not use the intersected allowed-register-set.
+
+  // Copy the larger set.  Insert the smaller set into the larger.
+  if (two->count() > one->count()) {
+    IndexSet *temp = one;
+    one = two;
+    two = temp;
+  }
+
+  clear();
+
+  // Used to compute degree of register-only interferences.  Infinite-stack
+  // neighbors do not alter colorability, as they can always color to some
+  // other color.  (A variant of the Briggs assertion)
+  uint reg_degree = 0;
+
+  uint element;
+  // Load up the combined interference set with the neighbors of one
+  IndexSetIterator elements(one);
+  while ((element = elements.next()) != 0) {
+    LRG &lrg = ifg->lrgs(element);
+    if (mask.overlap(lrg.mask())) {
+      insert(element);
+      if( !lrg.mask().is_AllStack() ) {
+        reg_degree += lrg1.compute_degree(lrg);
+        if( reg_degree >= fail_degree ) return reg_degree;
+      } else {
+        // !!!!! Danger!  No update to reg_degree despite having a neighbor.
+        // A variant of the Briggs assertion.
+        // Not needed if I simplify during coalesce, ala George/Appel.
+        assert( lrg.lo_degree(), "" );
+      }
+    }
+  }
+  // Add neighbors of two as well
+  IndexSetIterator elements2(two);
+  while ((element = elements2.next()) != 0) {
+    LRG &lrg = ifg->lrgs(element);
+    if (mask.overlap(lrg.mask())) {
+      if (insert(element)) {
+        if( !lrg.mask().is_AllStack() ) {
+          reg_degree += lrg2.compute_degree(lrg);
+          if( reg_degree >= fail_degree ) return reg_degree;
+        } else {
+          // !!!!! Danger!  No update to reg_degree despite having a neighbor.
+          // A variant of the Briggs assertion.
+          // Not needed if I simplify during coalesce, ala George/Appel.
+          assert( lrg.lo_degree(), "" );
+        }
+      }
+    }
+  }
+
+  return reg_degree;
+}
+
+//---------------------------- IndexSet() -----------------------------
+// A deep copy constructor.  This is used when you need a scratch copy of this set.
+
+IndexSet::IndexSet (IndexSet *set) {
+#ifdef ASSERT
+  _serial_number = _serial_count++;
+  set->check_watch("copied", _serial_number);
+  check_watch("initialized by copy", set->_serial_number);
+  _max_elements = set->_max_elements;
+#endif
+  _count = set->_count;
+  _max_blocks = set->_max_blocks;
+  if (_max_blocks <= preallocated_block_list_size) {
+    _blocks = _preallocated_block_list;
+  } else {
+    _blocks =
+      (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+  }
+  for (uint i = 0; i < _max_blocks; i++) {
+    BitBlock *block = set->_blocks[i];
+    if (block == &_empty_block) {
+      set_block(i, &_empty_block);
+    } else {
+      BitBlock *new_block = alloc_block();
+      memcpy(new_block->words(), block->words(), sizeof(uint32) * words_per_block);
+      set_block(i, new_block);
+    }
+  }
+}
+
+//---------------------------- IndexSet::initialize() -----------------------------
+// Prepare an IndexSet for use.
+
+void IndexSet::initialize(uint max_elements) {
+#ifdef ASSERT
+  _serial_number = _serial_count++;
+  check_watch("initialized", max_elements);
+  _max_elements = max_elements;
+#endif
+  _count = 0;
+  _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+
+  if (_max_blocks <= preallocated_block_list_size) {
+    _blocks = _preallocated_block_list;
+  } else {
+    _blocks = (IndexSet::BitBlock**) arena()->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+  }
+  for (uint i = 0; i < _max_blocks; i++) {
+    set_block(i, &_empty_block);
+  }
+}
+
+//---------------------------- IndexSet::initialize()------------------------------
+// Prepare an IndexSet for use.  If it needs to allocate its _blocks array, it does
+// so from the Arena passed as a parameter.  BitBlock allocation is still done from
+// the static Arena which was set with reset_memory().
+
+void IndexSet::initialize(uint max_elements, Arena *arena) {
+#ifdef ASSERT
+  _serial_number = _serial_count++;
+  check_watch("initialized2", max_elements);
+  _max_elements = max_elements;
+#endif // ASSERT
+  _count = 0;
+  _max_blocks = (max_elements + bits_per_block - 1) / bits_per_block;
+
+  if (_max_blocks <= preallocated_block_list_size) {
+    _blocks = _preallocated_block_list;
+  } else {
+    _blocks = (IndexSet::BitBlock**) arena->Amalloc_4(sizeof(IndexSet::BitBlock**) * _max_blocks);
+  }
+  for (uint i = 0; i < _max_blocks; i++) {
+    set_block(i, &_empty_block);
+  }
+}
+
+//---------------------------- IndexSet::swap() -----------------------------
+// Exchange two IndexSets.
+
+void IndexSet::swap(IndexSet *set) {
+#ifdef ASSERT
+  assert(_max_elements == set->_max_elements, "must have same universe size to swap");
+  check_watch("swap", set->_serial_number);
+  set->check_watch("swap", _serial_number);
+#endif
+
+  for (uint i = 0; i < _max_blocks; i++) {
+    BitBlock *temp = _blocks[i];
+    set_block(i, set->_blocks[i]);
+    set->set_block(i, temp);
+  }
+  uint temp = _count;
+  _count = set->_count;
+  set->_count = temp;
+}
+
+//---------------------------- IndexSet::dump() -----------------------------
+// Print this set.  Used for debugging.
+
+#ifndef PRODUCT
+void IndexSet::dump() const {
+  IndexSetIterator elements(this);
+
+  tty->print("{");
+  uint i;
+  while ((i = elements.next()) != 0) {
+    tty->print("L%d ", i);
+  }
+  tty->print_cr("}");
+}
+#endif
+
+#ifdef ASSERT
+//---------------------------- IndexSet::tally_iteration_statistics() -----------------------------
+// Update block/bit counts to reflect that this set has been iterated over.
+
+void IndexSet::tally_iteration_statistics() const {
+  _total_bits += count();
+
+  for (uint i = 0; i < _max_blocks; i++) {
+    if (_blocks[i] != &_empty_block) {
+      _total_used_blocks++;
+    } else {
+      _total_unused_blocks++;
+    }
+  }
+}
+
+//---------------------------- IndexSet::print_statistics() -----------------------------
+// Print statistics about IndexSet usage.
+
+void IndexSet::print_statistics() {
+  long total_blocks = _total_used_blocks + _total_unused_blocks;
+  tty->print_cr ("Accumulated IndexSet usage statistics:");
+  tty->print_cr ("--------------------------------------");
+  tty->print_cr ("  Iteration:");
+  tty->print_cr ("    blocks visited: %d", total_blocks);
+  tty->print_cr ("    blocks empty: %4.2f%%", 100.0*_total_unused_blocks/total_blocks);
+  tty->print_cr ("    bit density (bits/used blocks): %4.2f%%", (double)_total_bits/_total_used_blocks);
+  tty->print_cr ("    bit density (bits/all blocks): %4.2f%%", (double)_total_bits/total_blocks);
+  tty->print_cr ("  Allocation:");
+  tty->print_cr ("    blocks allocated: %d", _alloc_new);
+  tty->print_cr ("    blocks used/reused: %d", _alloc_total);
+}
+
+//---------------------------- IndexSet::verify() -----------------------------
+// Expensive test of IndexSet sanity.  Ensure that the count agrees with the
+// number of bits in the blocks.  Make sure the iterator is seeing all elements
+// of the set.  Meant for use during development.
+
+void IndexSet::verify() const {
+  assert(!member(0), "zero cannot be a member");
+  uint count = 0;
+  uint i;
+  for (i = 1; i < _max_elements; i++) {
+    if (member(i)) {
+      count++;
+      assert(count <= _count, "_count is messed up");
+    }
+  }
+
+  IndexSetIterator elements(this);
+  count = 0;
+  while ((i = elements.next()) != 0) {
+    count++;
+    assert(member(i), "returned a non member");
+    assert(count <= _count, "iterator returned wrong number of elements");
+  }
+}
+#endif
+
+//---------------------------- IndexSetIterator() -----------------------------
+// Create an iterator for a set.  If empty blocks are detected when iterating
+// over the set, these blocks are replaced.
+
+IndexSetIterator::IndexSetIterator(IndexSet *set) {
+#ifdef ASSERT
+  if (CollectIndexSetStatistics) {
+    set->tally_iteration_statistics();
+  }
+  set->check_watch("traversed", set->count());
+#endif
+  if (set->is_empty()) {
+    _current = 0;
+    _next_word = IndexSet::words_per_block;
+    _next_block = 1;
+    _max_blocks = 1;
+
+    // We don't need the following values when we iterate over an empty set.
+    // The commented out code is left here to document that the omission
+    // is intentional.
+    //
+    //_value = 0;
+    //_words = NULL;
+    //_blocks = NULL;
+    //_set = NULL;
+  } else {
+    _current = 0;
+    _value = 0;
+    _next_block = 0;
+    _next_word = IndexSet::words_per_block;
+
+    _max_blocks = set->_max_blocks;
+    _words = NULL;
+    _blocks = set->_blocks;
+    _set = set;
+  }
+}
+
+//---------------------------- IndexSetIterator(const) -----------------------------
+// Iterate over a constant IndexSet.
+
+IndexSetIterator::IndexSetIterator(const IndexSet *set) {
+#ifdef ASSERT
+  if (CollectIndexSetStatistics) {
+    set->tally_iteration_statistics();
+  }
+  // We don't call check_watch from here to avoid bad recursion.
+  //   set->check_watch("traversed const", set->count());
+#endif
+  if (set->is_empty()) {
+    _current = 0;
+    _next_word = IndexSet::words_per_block;
+    _next_block = 1;
+    _max_blocks = 1;
+
+    // We don't need the following values when we iterate over an empty set.
+    // The commented out code is left here to document that the omission
+    // is intentional.
+    //
+    //_value = 0;
+    //_words = NULL;
+    //_blocks = NULL;
+    //_set = NULL;
+  } else {
+    _current = 0;
+    _value = 0;
+    _next_block = 0;
+    _next_word = IndexSet::words_per_block;
+
+    _max_blocks = set->_max_blocks;
+    _words = NULL;
+    _blocks = set->_blocks;
+    _set = NULL;
+  }
+}
+
+//---------------------------- List16Iterator::advance_and_next() -----------------------------
+// Advance to the next non-empty word in the set being iterated over.  Return the next element
+// if there is one.  If we are done, return 0.  This method is called from the next() method
+// when it gets done with a word.
+
+uint IndexSetIterator::advance_and_next() {
+  // See if there is another non-empty word in the current block.
+  for (uint wi = _next_word; wi < (unsigned)IndexSet::words_per_block; wi++) {
+    if (_words[wi] != 0) {
+      // Found a non-empty word.
+      _value = ((_next_block - 1) * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
+      _current = _words[wi];
+
+      _next_word = wi+1;
+
+      return next();
+    }
+  }
+
+  // We ran out of words in the current block.  Advance to next non-empty block.
+  for (uint bi = _next_block; bi < _max_blocks; bi++) {
+    if (_blocks[bi] != &IndexSet::_empty_block) {
+      // Found a non-empty block.
+
+      _words = _blocks[bi]->words();
+      for (uint wi = 0; wi < (unsigned)IndexSet::words_per_block; wi++) {
+        if (_words[wi] != 0) {
+          // Found a non-empty word.
+          _value = (bi * IndexSet::bits_per_block) + (wi * IndexSet::bits_per_word);
+          _current = _words[wi];
+
+          _next_block = bi+1;
+          _next_word = wi+1;
+
+          return next();
+        }
+      }
+
+      // All of the words in the block were empty.  Replace
+      // the block with the empty block.
+      if (_set) {
+        _set->free_block(bi);
+      }
+    }
+  }
+
+  // These assignments make redundant calls to next on a finished iterator
+  // faster.  Probably not necessary.
+  _next_block = _max_blocks;
+  _next_word = IndexSet::words_per_block;
+
+  // No more words.
+  return 0;
+}
diff --git a/src/share/vm/opto/indexSet.hpp b/src/share/vm/opto/indexSet.hpp
new file mode 100644
index 000000000..de7de22aa
--- /dev/null
+++ b/src/share/vm/opto/indexSet.hpp
@@ -0,0 +1,461 @@
+/*
+ * Copyright 1998-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// This file defines the IndexSet class, a set of sparse integer indices.
+// This data structure is used by the compiler in its liveness analysis and
+// during register allocation.
+
+//-------------------------------- class IndexSet ----------------------------
+// An IndexSet is a piece-wise bitvector.  At the top level, we have an array
+// of pointers to bitvector chunks called BitBlocks.  Each BitBlock has a fixed
+// size and is allocated from a shared free list.  The bits which are set in
+// each BitBlock correspond to the elements of the set.
+
+class IndexSet : public ResourceObj {
+ friend class IndexSetIterator;
+
+ public:
+  // When we allocate an IndexSet, it starts off with an array of top level block
+  // pointers of a set length.  This size is intended to be large enough for the
+  // majority of IndexSets.  In the cases when this size is not large enough,
+  // a separately allocated array is used.
+
+  // The length of the preallocated top level block array
+  enum { preallocated_block_list_size = 16 };
+
+  // Elements of a IndexSet get decomposed into three fields.  The highest order
+  // bits are the block index, which tell which high level block holds the element.
+  // Within that block, the word index indicates which word holds the element.
+  // Finally, the bit index determines which single bit within that word indicates
+  // membership of the element in the set.
+
+  // The lengths of the index bitfields
+  enum { bit_index_length = 5,
+         word_index_length = 3,
+         block_index_length = 8 // not used
+  };
+
+  // Derived constants used for manipulating the index bitfields
+  enum {
+         bit_index_offset = 0, // not used
+         word_index_offset = bit_index_length,
+         block_index_offset = bit_index_length + word_index_length,
+
+         bits_per_word = 1 << bit_index_length,
+         words_per_block = 1 << word_index_length,
+         bits_per_block = bits_per_word * words_per_block,
+
+         bit_index_mask = right_n_bits(bit_index_length),
+         word_index_mask = right_n_bits(word_index_length)
+  };
+
+  // These routines are used for extracting the block, word, and bit index
+  // from an element.
+  static uint get_block_index(uint element) {
+    return element >> block_index_offset;
+  }
+  static uint get_word_index(uint element) {
+    return mask_bits(element >> word_index_offset,word_index_mask);
+  }
+  static uint get_bit_index(uint element) {
+    return mask_bits(element,bit_index_mask);
+  }
+
+  //------------------------------ class BitBlock ----------------------------
+  // The BitBlock class is a segment of a bitvector set.
+
+  class BitBlock : public ResourceObj {
+   friend class IndexSetIterator;
+   friend class IndexSet;
+
+   private:
+    // All of BitBlocks fields and methods are declared private.  We limit
+    // access to IndexSet and IndexSetIterator.
+
+    // A BitBlock is composed of some number of 32 bit words.  When a BitBlock
+    // is not in use by any IndexSet, it is stored on a free list.  The next field
+    // is used by IndexSet to mainting this free list.
+
+    union {
+      uint32 _words[words_per_block];
+      BitBlock *_next;
+    } _data;
+
+    // accessors
+    uint32 *words() { return _data._words; }
+    void set_next(BitBlock *next) { _data._next = next; }
+    BitBlock *next() { return _data._next; }
+
+    // Operations.  A BitBlock supports four simple operations,
+    // clear(), member(), insert(), and remove().  These methods do
+    // not assume that the block index has been masked out.
+
+    void clear() {
+      memset(words(), 0, sizeof(uint32) * words_per_block);
+    }
+
+    bool member(uint element) {
+      uint word_index = IndexSet::get_word_index(element);
+      uint bit_index = IndexSet::get_bit_index(element);
+
+      return ((words()[word_index] & (uint32)(0x1 << bit_index)) != 0);
+    }
+
+    bool insert(uint element) {
+      uint word_index = IndexSet::get_word_index(element);
+      uint bit_index = IndexSet::get_bit_index(element);
+
+      uint32 bit = (0x1 << bit_index);
+      uint32 before = words()[word_index];
+      words()[word_index] = before | bit;
+      return ((before & bit) != 0);
+    }
+
+    bool remove(uint element) {
+      uint word_index = IndexSet::get_word_index(element);
+      uint bit_index = IndexSet::get_bit_index(element);
+
+      uint32 bit = (0x1 << bit_index);
+      uint32 before = words()[word_index];
+      words()[word_index] = before & ~bit;
+      return ((before & bit) != 0);
+    }
+  };
+
+  //-------------------------- BitBlock allocation ---------------------------
+ private:
+
+  // All IndexSets share an arena from which they allocate BitBlocks.  Unused
+  // BitBlocks are placed on a free list.
+
+  // The number of BitBlocks to allocate at a time
+  enum { bitblock_alloc_chunk_size = 50 };
+
+  static Arena *arena() { return Compile::current()->indexSet_arena(); }
+
+  static void populate_free_list();
+
+ public:
+
+  // Invalidate the current free BitBlock list and begin allocation
+  // from a new arena.  It is essential that this method is called whenever
+  // the Arena being used for BitBlock allocation is reset.
+  static void reset_memory(Compile* compile, Arena *arena) {
+    compile->set_indexSet_free_block_list(NULL);
+    compile->set_indexSet_arena(arena);
+
+   // This should probably be done in a static initializer
+   _empty_block.clear();
+  }
+
+ private:
+  friend class BitBlock;
+  // A distinguished BitBlock which always remains empty.  When a new IndexSet is
+  // created, all of its top level BitBlock pointers are initialized to point to
+  // this.
+  static BitBlock _empty_block;
+
+  //-------------------------- Members ------------------------------------------
+
+  // The number of elements in the set
+  uint      _count;
+
+  // Our top level array of bitvector segments
+  BitBlock **_blocks;
+
+  BitBlock  *_preallocated_block_list[preallocated_block_list_size];
+
+  // The number of top level array entries in use
+  uint       _max_blocks;
+
+  // Our assertions need to know the maximum number allowed in the set
+#ifdef ASSERT
+  uint       _max_elements;
+#endif
+
+  // The next IndexSet on the free list (not used at same time as count)
+  IndexSet *_next;
+
+ public:
+  //-------------------------- Free list operations ------------------------------
+  // Individual IndexSets can be placed on a free list.  This is done in PhaseLive.
+
+  IndexSet *next() {
+#ifdef ASSERT
+    if( VerifyOpto ) {
+      check_watch("removed from free list?", ((_next == NULL) ? 0 : _next->_serial_number));
+    }
+#endif
+    return _next;
+  }
+
+  void set_next(IndexSet *next) {
+#ifdef ASSERT
+    if( VerifyOpto ) {
+      check_watch("put on free list?", ((next == NULL) ? 0 : next->_serial_number));
+    }
+#endif
+    _next = next;
+  }
+
+ private:
+  //-------------------------- Utility methods -----------------------------------
+
+  // Get the block which holds element
+  BitBlock *get_block_containing(uint element) const {
+    assert(element < _max_elements, "element out of bounds");
+    return _blocks[get_block_index(element)];
+  }
+
+  // Set a block in the top level array
+  void set_block(uint index, BitBlock *block) {
+#ifdef ASSERT
+    if( VerifyOpto )
+      check_watch("set block", index);
+#endif
+    _blocks[index] = block;
+  }
+
+  // Get a BitBlock from the free list
+  BitBlock *alloc_block();
+
+  // Get a BitBlock from the free list and place it in the top level array
+  BitBlock *alloc_block_containing(uint element);
+
+  // Free a block from the top level array, placing it on the free BitBlock list
+  void free_block(uint i);
+
+ public:
+  //-------------------------- Primitive set operations --------------------------
+
+  void clear() {
+#ifdef ASSERT
+    if( VerifyOpto )
+      check_watch("clear");
+#endif
+    _count = 0;
+    for (uint i = 0; i < _max_blocks; i++) {
+      BitBlock *block = _blocks[i];
+      if (block != &_empty_block) {
+        free_block(i);
+      }
+    }
+  }
+
+  uint count() const { return _count; }
+
+  bool is_empty() const { return _count == 0; }
+
+  bool member(uint element) const {
+    return get_block_containing(element)->member(element);
+  }
+
+  bool insert(uint element) {
+#ifdef ASSERT
+    if( VerifyOpto )
+      check_watch("insert", element);
+#endif
+    if (element == 0) {
+      return 0;
+    }
+    BitBlock *block = get_block_containing(element);
+    if (block == &_empty_block) {
+      block = alloc_block_containing(element);
+    }
+    bool present = block->insert(element);
+    if (!present) {
+      _count++;
+    }
+    return !present;
+  }
+
+  bool remove(uint element) {
+#ifdef ASSERT
+    if( VerifyOpto )
+      check_watch("remove", element);
+#endif
+
+    BitBlock *block = get_block_containing(element);
+    bool present = block->remove(element);
+    if (present) {
+      _count--;
+    }
+    return present;
+  }
+
+  //-------------------------- Compound set operations ------------------------
+  // Compute the union of all elements of one and two which interfere
+  // with the RegMask mask.  If the degree of the union becomes
+  // exceeds fail_degree, the union bails out.  The underlying set is
+  // cleared before the union is performed.
+  uint lrg_union(uint lr1, uint lr2,
+                 const uint fail_degree,
+                 const class PhaseIFG *ifg,
+                 const RegMask &mask);
+
+
+  //------------------------- Construction, initialization -----------------------
+
+  IndexSet() {}
+
+  // This constructor is used for making a deep copy of a IndexSet.
+  IndexSet(IndexSet *set);
+
+  // Perform initialization on a IndexSet
+  void initialize(uint max_element);
+
+  // Initialize a IndexSet.  If the top level BitBlock array needs to be
+  // allocated, do it from the proffered arena.  BitBlocks are still allocated
+  // from the static Arena member.
+  void initialize(uint max_element, Arena *arena);
+
+  // Exchange two sets
+  void swap(IndexSet *set);
+
+  //-------------------------- Debugging and statistics --------------------------
+
+#ifndef PRODUCT
+  // Output a IndexSet for debugging
+  void dump() const;
+#endif
+
+#ifdef ASSERT
+  void tally_iteration_statistics() const;
+
+  // BitBlock allocation statistics
+  static uint _alloc_new;
+  static uint _alloc_total;
+
+  // Block density statistics
+  static long _total_bits;
+  static long _total_used_blocks;
+  static long _total_unused_blocks;
+
+  // Sanity tests
+  void verify() const;
+
+  static int _serial_count;
+  int        _serial_number;
+
+  // Check to see if the serial number of the current set is the one we're tracing.
+  // If it is, print a message.
+  void check_watch(const char *operation, uint operand) const {
+    if (IndexSetWatch != 0) {
+      if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
+        tty->print_cr("IndexSet %d : %s ( %d )", _serial_number, operation, operand);
+      }
+    }
+  }
+  void check_watch(const char *operation) const {
+    if (IndexSetWatch != 0) {
+      if (IndexSetWatch == -1 || _serial_number == IndexSetWatch) {
+        tty->print_cr("IndexSet %d : %s", _serial_number, operation);
+      }
+    }
+  }
+
+ public:
+  static void print_statistics();
+
+#endif
+};
+
+
+//-------------------------------- class IndexSetIterator --------------------
+// An iterator for IndexSets.
+
+class IndexSetIterator VALUE_OBJ_CLASS_SPEC {
+ friend class IndexSet;
+
+ public:
+
+  // We walk over the bits in a word in chunks of size window_size.
+  enum { window_size = 5,
+         window_mask = right_n_bits(window_size),
+         table_size  = (1 << window_size) };
+
+  // For an integer of length window_size, what is the first set bit?
+  static const byte _first_bit[table_size];
+
+  // For an integer of length window_size, what is the second set bit?
+  static const byte _second_bit[table_size];
+
+ private:
+  // The current word we are inspecting
+  uint32                _current;
+
+  // What element number are we currently on?
+  uint                  _value;
+
+  // The index of the next word we will inspect
+  uint                  _next_word;
+
+  // A pointer to the contents of the current block
+  uint32               *_words;
+
+  // The index of the next block we will inspect
+  uint                  _next_block;
+
+  // A pointer to the blocks in our set
+  IndexSet::BitBlock **_blocks;
+
+  // The number of blocks in the set
+  uint                  _max_blocks;
+
+  // If the iterator was created from a non-const set, we replace
+  // non-canonical empty blocks with the _empty_block pointer.  If
+  // _set is NULL, we do no replacement.
+  IndexSet            *_set;
+
+  // Advance to the next non-empty word and return the next
+  // element in the set.
+  uint advance_and_next();
+
+
+ public:
+
+  // If an iterator is built from a constant set then empty blocks
+  // are not canonicalized.
+  IndexSetIterator(IndexSet *set);
+  IndexSetIterator(const IndexSet *set);
+
+  // Return the next element of the set.  Return 0 when done.
+  uint next() {
+    uint current = _current;
+    if (current != 0) {
+      uint value = _value;
+      while (mask_bits(current,window_mask) == 0) {
+        current >>= window_size;
+        value += window_size;
+      }
+
+      uint advance = _second_bit[mask_bits(current,window_mask)];
+      _current = current >> advance;
+      _value = value + advance;
+      return value + _first_bit[mask_bits(current,window_mask)];
+    } else {
+      return advance_and_next();
+    }
+  }
+};
diff --git a/src/share/vm/opto/lcm.cpp b/src/share/vm/opto/lcm.cpp
new file mode 100644
index 000000000..5b0ddae99
--- /dev/null
+++ b/src/share/vm/opto/lcm.cpp
@@ -0,0 +1,934 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_lcm.cpp.incl"
+
+//------------------------------implicit_null_check----------------------------
+// Detect implicit-null-check opportunities.  Basically, find NULL checks
+// with suitable memory ops nearby.  Use the memory op to do the NULL check.
+// I can generate a memory op if there is not one nearby.
+// The proj is the control projection for the not-null case.
+// The val is the pointer being checked for nullness.
+void Block::implicit_null_check(PhaseCFG *cfg, Node *proj, Node *val, int allowed_reasons) {
+  // Assume if null check need for 0 offset then always needed
+  // Intel solaris doesn't support any null checks yet and no
+  // mechanism exists (yet) to set the switches at an os_cpu level
+  if( !ImplicitNullChecks || MacroAssembler::needs_explicit_null_check(0)) return;
+
+  // Make sure the ptr-is-null path appears to be uncommon!
+  float f = end()->as_MachIf()->_prob;
+  if( proj->Opcode() == Op_IfTrue ) f = 1.0f - f;
+  if( f > PROB_UNLIKELY_MAG(4) ) return;
+
+  uint bidx = 0;                // Capture index of value into memop
+  bool was_store;               // Memory op is a store op
+
+  // Get the successor block for if the test ptr is non-null
+  Block* not_null_block;  // this one goes with the proj
+  Block* null_block;
+  if (_nodes[_nodes.size()-1] == proj) {
+    null_block     = _succs[0];
+    not_null_block = _succs[1];
+  } else {
+    assert(_nodes[_nodes.size()-2] == proj, "proj is one or the other");
+    not_null_block = _succs[0];
+    null_block     = _succs[1];
+  }
+
+  // Search the exception block for an uncommon trap.
+  // (See Parse::do_if and Parse::do_ifnull for the reason
+  // we need an uncommon trap.  Briefly, we need a way to
+  // detect failure of this optimization, as in 6366351.)
+  {
+    bool found_trap = false;
+    for (uint i1 = 0; i1 < null_block->_nodes.size(); i1++) {
+      Node* nn = null_block->_nodes[i1];
+      if (nn->is_MachCall() &&
+          nn->as_MachCall()->entry_point() ==
+          SharedRuntime::uncommon_trap_blob()->instructions_begin()) {
+        const Type* trtype = nn->in(TypeFunc::Parms)->bottom_type();
+        if (trtype->isa_int() && trtype->is_int()->is_con()) {
+          jint tr_con = trtype->is_int()->get_con();
+          Deoptimization::DeoptReason reason = Deoptimization::trap_request_reason(tr_con);
+          Deoptimization::DeoptAction action = Deoptimization::trap_request_action(tr_con);
+          assert((int)reason < (int)BitsPerInt, "recode bit map");
+          if (is_set_nth_bit(allowed_reasons, (int) reason)
+              && action != Deoptimization::Action_none) {
+            // This uncommon trap is sure to recompile, eventually.
+            // When that happens, C->too_many_traps will prevent
+            // this transformation from happening again.
+            found_trap = true;
+          }
+        }
+        break;
+      }
+    }
+    if (!found_trap) {
+      // We did not find an uncommon trap.
+      return;
+    }
+  }
+
+  // Search the successor block for a load or store who's base value is also
+  // the tested value.  There may be several.
+  Node_List *out = new Node_List(Thread::current()->resource_area());
+  MachNode *best = NULL;        // Best found so far
+  for (DUIterator i = val->outs(); val->has_out(i); i++) {
+    Node *m = val->out(i);
+    if( !m->is_Mach() ) continue;
+    MachNode *mach = m->as_Mach();
+    was_store = false;
+    switch( mach->ideal_Opcode() ) {
+    case Op_LoadB:
+    case Op_LoadC:
+    case Op_LoadD:
+    case Op_LoadF:
+    case Op_LoadI:
+    case Op_LoadL:
+    case Op_LoadP:
+    case Op_LoadS:
+    case Op_LoadKlass:
+    case Op_LoadRange:
+    case Op_LoadD_unaligned:
+    case Op_LoadL_unaligned:
+      break;
+    case Op_StoreB:
+    case Op_StoreC:
+    case Op_StoreCM:
+    case Op_StoreD:
+    case Op_StoreF:
+    case Op_StoreI:
+    case Op_StoreL:
+    case Op_StoreP:
+      was_store = true;         // Memory op is a store op
+      // Stores will have their address in slot 2 (memory in slot 1).
+      // If the value being nul-checked is in another slot, it means we
+      // are storing the checked value, which does NOT check the value!
+      if( mach->in(2) != val ) continue;
+      break;                    // Found a memory op?
+    case Op_StrComp:
+      // Not a legit memory op for implicit null check regardless of
+      // embedded loads
+      continue;
+    default:                    // Also check for embedded loads
+      if( !mach->needs_anti_dependence_check() )
+        continue;               // Not an memory op; skip it
+      break;
+    }
+    // check if the offset is not too high for implicit exception
+    {
+      intptr_t offset = 0;
+      const TypePtr *adr_type = NULL;  // Do not need this return value here
+      const Node* base = mach->get_base_and_disp(offset, adr_type);
+      if (base == NULL || base == NodeSentinel) {
+        // cannot reason about it; is probably not implicit null exception
+      } else {
+        const TypePtr* tptr = base->bottom_type()->is_ptr();
+        // Give up if offset is not a compile-time constant
+        if( offset == Type::OffsetBot || tptr->_offset == Type::OffsetBot )
+          continue;
+        offset += tptr->_offset; // correct if base is offseted
+        if( MacroAssembler::needs_explicit_null_check(offset) )
+          continue;             // Give up is reference is beyond 4K page size
+      }
+    }
+
+    // Check ctrl input to see if the null-check dominates the memory op
+    Block *cb = cfg->_bbs[mach->_idx];
+    cb = cb->_idom;             // Always hoist at least 1 block
+    if( !was_store ) {          // Stores can be hoisted only one block
+      while( cb->_dom_depth > (_dom_depth + 1))
+        cb = cb->_idom;         // Hoist loads as far as we want
+      // The non-null-block should dominate the memory op, too. Live
+      // range spilling will insert a spill in the non-null-block if it is
+      // needs to spill the memory op for an implicit null check.
+      if (cb->_dom_depth == (_dom_depth + 1)) {
+        if (cb != not_null_block) continue;
+        cb = cb->_idom;
+      }
+    }
+    if( cb != this ) continue;
+
+    // Found a memory user; see if it can be hoisted to check-block
+    uint vidx = 0;              // Capture index of value into memop
+    uint j;
+    for( j = mach->req()-1; j > 0; j-- ) {
+      if( mach->in(j) == val ) vidx = j;
+      // Block of memory-op input
+      Block *inb = cfg->_bbs[mach->in(j)->_idx];
+      Block *b = this;          // Start from nul check
+      while( b != inb && b->_dom_depth > inb->_dom_depth )
+        b = b->_idom;           // search upwards for input
+      // See if input dominates null check
+      if( b != inb )
+        break;
+    }
+    if( j > 0 )
+      continue;
+    Block *mb = cfg->_bbs[mach->_idx];
+    // Hoisting stores requires more checks for the anti-dependence case.
+    // Give up hoisting if we have to move the store past any load.
+    if( was_store ) {
+      Block *b = mb;            // Start searching here for a local load
+      // mach use (faulting) trying to hoist
+      // n might be blocker to hoisting
+      while( b != this ) {
+        uint k;
+        for( k = 1; k < b->_nodes.size(); k++ ) {
+          Node *n = b->_nodes[k];
+          if( n->needs_anti_dependence_check() &&
+              n->in(LoadNode::Memory) == mach->in(StoreNode::Memory) )
+            break;              // Found anti-dependent load
+        }
+        if( k < b->_nodes.size() )
+          break;                // Found anti-dependent load
+        // Make sure control does not do a merge (would have to check allpaths)
+        if( b->num_preds() != 2 ) break;
+        b = cfg->_bbs[b->pred(1)->_idx]; // Move up to predecessor block
+      }
+      if( b != this ) continue;
+    }
+
+    // Make sure this memory op is not already being used for a NullCheck
+    Node *e = mb->end();
+    if( e->is_MachNullCheck() && e->in(1) == mach )
+      continue;                 // Already being used as a NULL check
+
+    // Found a candidate!  Pick one with least dom depth - the highest
+    // in the dom tree should be closest to the null check.
+    if( !best ||
+        cfg->_bbs[mach->_idx]->_dom_depth < cfg->_bbs[best->_idx]->_dom_depth ) {
+      best = mach;
+      bidx = vidx;
+
+    }
+  }
+  // No candidate!
+  if( !best ) return;
+
+  // ---- Found an implicit null check
+  extern int implicit_null_checks;
+  implicit_null_checks++;
+
+  // Hoist the memory candidate up to the end of the test block.
+  Block *old_block = cfg->_bbs[best->_idx];
+  old_block->find_remove(best);
+  add_inst(best);
+  cfg->_bbs.map(best->_idx,this);
+
+  // Move the control dependence
+  if (best->in(0) && best->in(0) == old_block->_nodes[0])
+    best->set_req(0, _nodes[0]);
+
+  // Check for flag-killing projections that also need to be hoisted
+  // Should be DU safe because no edge updates.
+  for (DUIterator_Fast jmax, j = best->fast_outs(jmax); j < jmax; j++) {
+    Node* n = best->fast_out(j);
+    if( n->Opcode() == Op_MachProj ) {
+      cfg->_bbs[n->_idx]->find_remove(n);
+      add_inst(n);
+      cfg->_bbs.map(n->_idx,this);
+    }
+  }
+
+  Compile *C = cfg->C;
+  // proj==Op_True --> ne test; proj==Op_False --> eq test.
+  // One of two graph shapes got matched:
+  //   (IfTrue  (If (Bool NE (CmpP ptr NULL))))
+  //   (IfFalse (If (Bool EQ (CmpP ptr NULL))))
+  // NULL checks are always branch-if-eq.  If we see a IfTrue projection
+  // then we are replacing a 'ne' test with a 'eq' NULL check test.
+  // We need to flip the projections to keep the same semantics.
+  if( proj->Opcode() == Op_IfTrue ) {
+    // Swap order of projections in basic block to swap branch targets
+    Node *tmp1 = _nodes[end_idx()+1];
+    Node *tmp2 = _nodes[end_idx()+2];
+    _nodes.map(end_idx()+1, tmp2);
+    _nodes.map(end_idx()+2, tmp1);
+    Node *tmp = new (C, 1) Node(C->top()); // Use not NULL input
+    tmp1->replace_by(tmp);
+    tmp2->replace_by(tmp1);
+    tmp->replace_by(tmp2);
+    tmp->destruct();
+  }
+
+  // Remove the existing null check; use a new implicit null check instead.
+  // Since schedule-local needs precise def-use info, we need to correct
+  // it as well.
+  Node *old_tst = proj->in(0);
+  MachNode *nul_chk = new (C) MachNullCheckNode(old_tst->in(0),best,bidx);
+  _nodes.map(end_idx(),nul_chk);
+  cfg->_bbs.map(nul_chk->_idx,this);
+  // Redirect users of old_test to nul_chk
+  for (DUIterator_Last i2min, i2 = old_tst->last_outs(i2min); i2 >= i2min; --i2)
+    old_tst->last_out(i2)->set_req(0, nul_chk);
+  // Clean-up any dead code
+  for (uint i3 = 0; i3 < old_tst->req(); i3++)
+    old_tst->set_req(i3, NULL);
+
+  cfg->latency_from_uses(nul_chk);
+  cfg->latency_from_uses(best);
+}
+
+
+//------------------------------select-----------------------------------------
+// Select a nice fellow from the worklist to schedule next. If there is only
+// one choice, then use it. Projections take top priority for correctness
+// reasons - if I see a projection, then it is next.  There are a number of
+// other special cases, for instructions that consume condition codes, et al.
+// These are chosen immediately. Some instructions are required to immediately
+// precede the last instruction in the block, and these are taken last. Of the
+// remaining cases (most), choose the instruction with the greatest latency
+// (that is, the most number of pseudo-cycles required to the end of the
+// routine). If there is a tie, choose the instruction with the most inputs.
+Node *Block::select(PhaseCFG *cfg, Node_List &worklist, int *ready_cnt, VectorSet &next_call, uint sched_slot) {
+
+  // If only a single entry on the stack, use it
+  uint cnt = worklist.size();
+  if (cnt == 1) {
+    Node *n = worklist[0];
+    worklist.map(0,worklist.pop());
+    return n;
+  }
+
+  uint choice  = 0; // Bigger is most important
+  uint latency = 0; // Bigger is scheduled first
+  uint score   = 0; // Bigger is better
+  uint idx;         // Index in worklist
+
+  for( uint i=0; i<cnt; i++ ) { // Inspect entire worklist
+    // Order in worklist is used to break ties.
+    // See caller for how this is used to delay scheduling
+    // of induction variable increments to after the other
+    // uses of the phi are scheduled.
+    Node *n = worklist[i];      // Get Node on worklist
+
+    int iop = n->is_Mach() ? n->as_Mach()->ideal_Opcode() : 0;
+    if( n->is_Proj() ||         // Projections always win
+        n->Opcode()== Op_Con || // So does constant 'Top'
+        iop == Op_CreateEx ||   // Create-exception must start block
+        iop == Op_CheckCastPP
+        ) {
+      worklist.map(i,worklist.pop());
+      return n;
+    }
+
+    // Final call in a block must be adjacent to 'catch'
+    Node *e = end();
+    if( e->is_Catch() && e->in(0)->in(0) == n )
+      continue;
+
+    // Memory op for an implicit null check has to be at the end of the block
+    if( e->is_MachNullCheck() && e->in(1) == n )
+      continue;
+
+    uint n_choice  = 2;
+
+    // See if this instruction is consumed by a branch. If so, then (as the
+    // branch is the last instruction in the basic block) force it to the
+    // end of the basic block
+    if ( must_clone[iop] ) {
+      // See if any use is a branch
+      bool found_machif = false;
+
+      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+        Node* use = n->fast_out(j);
+
+        // The use is a conditional branch, make them adjacent
+        if (use->is_MachIf() && cfg->_bbs[use->_idx]==this ) {
+          found_machif = true;
+          break;
+        }
+
+        // More than this instruction pending for successor to be ready,
+        // don't choose this if other opportunities are ready
+        if (ready_cnt[use->_idx] > 1)
+          n_choice = 1;
+      }
+
+      // loop terminated, prefer not to use this instruction
+      if (found_machif)
+        continue;
+    }
+
+    // See if this has a predecessor that is "must_clone", i.e. sets the
+    // condition code. If so, choose this first
+    for (uint j = 0; j < n->req() ; j++) {
+      Node *inn = n->in(j);
+      if (inn) {
+        if (inn->is_Mach() && must_clone[inn->as_Mach()->ideal_Opcode()] ) {
+          n_choice = 3;
+          break;
+        }
+      }
+    }
+
+    // MachTemps should be scheduled last so they are near their uses
+    if (n->is_MachTemp()) {
+      n_choice = 1;
+    }
+
+    uint n_latency = cfg->_node_latency.at_grow(n->_idx);
+    uint n_score   = n->req();   // Many inputs get high score to break ties
+
+    // Keep best latency found
+    if( choice < n_choice ||
+        ( choice == n_choice &&
+          ( latency < n_latency ||
+            ( latency == n_latency &&
+              ( score < n_score ))))) {
+      choice  = n_choice;
+      latency = n_latency;
+      score   = n_score;
+      idx     = i;               // Also keep index in worklist
+    }
+  } // End of for all ready nodes in worklist
+
+  Node *n = worklist[idx];      // Get the winner
+
+  worklist.map(idx,worklist.pop());     // Compress worklist
+  return n;
+}
+
+
+//------------------------------set_next_call----------------------------------
+void Block::set_next_call( Node *n, VectorSet &next_call, Block_Array &bbs ) {
+  if( next_call.test_set(n->_idx) ) return;
+  for( uint i=0; i<n->len(); i++ ) {
+    Node *m = n->in(i);
+    if( !m ) continue;  // must see all nodes in block that precede call
+    if( bbs[m->_idx] == this )
+      set_next_call( m, next_call, bbs );
+  }
+}
+
+//------------------------------needed_for_next_call---------------------------
+// Set the flag 'next_call' for each Node that is needed for the next call to
+// be scheduled.  This flag lets me bias scheduling so Nodes needed for the
+// next subroutine call get priority - basically it moves things NOT needed
+// for the next call till after the call.  This prevents me from trying to
+// carry lots of stuff live across a call.
+void Block::needed_for_next_call(Node *this_call, VectorSet &next_call, Block_Array &bbs) {
+  // Find the next control-defining Node in this block
+  Node* call = NULL;
+  for (DUIterator_Fast imax, i = this_call->fast_outs(imax); i < imax; i++) {
+    Node* m = this_call->fast_out(i);
+    if( bbs[m->_idx] == this && // Local-block user
+        m != this_call &&       // Not self-start node
+        m->is_Call() )
+      call = m;
+      break;
+  }
+  if (call == NULL)  return;    // No next call (e.g., block end is near)
+  // Set next-call for all inputs to this call
+  set_next_call(call, next_call, bbs);
+}
+
+//------------------------------sched_call-------------------------------------
+uint Block::sched_call( Matcher &matcher, Block_Array &bbs, uint node_cnt, Node_List &worklist, int *ready_cnt, MachCallNode *mcall, VectorSet &next_call ) {
+  RegMask regs;
+
+  // Schedule all the users of the call right now.  All the users are
+  // projection Nodes, so they must be scheduled next to the call.
+  // Collect all the defined registers.
+  for (DUIterator_Fast imax, i = mcall->fast_outs(imax); i < imax; i++) {
+    Node* n = mcall->fast_out(i);
+    assert( n->Opcode()==Op_MachProj, "" );
+    --ready_cnt[n->_idx];
+    assert( !ready_cnt[n->_idx], "" );
+    // Schedule next to call
+    _nodes.map(node_cnt++, n);
+    // Collect defined registers
+    regs.OR(n->out_RegMask());
+    // Check for scheduling the next control-definer
+    if( n->bottom_type() == Type::CONTROL )
+      // Warm up next pile of heuristic bits
+      needed_for_next_call(n, next_call, bbs);
+
+    // Children of projections are now all ready
+    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+      Node* m = n->fast_out(j); // Get user
+      if( bbs[m->_idx] != this ) continue;
+      if( m->is_Phi() ) continue;
+      if( !--ready_cnt[m->_idx] )
+        worklist.push(m);
+    }
+
+  }
+
+  // Act as if the call defines the Frame Pointer.
+  // Certainly the FP is alive and well after the call.
+  regs.Insert(matcher.c_frame_pointer());
+
+  // Set all registers killed and not already defined by the call.
+  uint r_cnt = mcall->tf()->range()->cnt();
+  int op = mcall->ideal_Opcode();
+  MachProjNode *proj = new (matcher.C, 1) MachProjNode( mcall, r_cnt+1, RegMask::Empty, MachProjNode::fat_proj );
+  bbs.map(proj->_idx,this);
+  _nodes.insert(node_cnt++, proj);
+
+  // Select the right register save policy.
+  const char * save_policy;
+  switch (op) {
+    case Op_CallRuntime:
+    case Op_CallLeaf:
+    case Op_CallLeafNoFP:
+      // Calling C code so use C calling convention
+      save_policy = matcher._c_reg_save_policy;
+      break;
+
+    case Op_CallStaticJava:
+    case Op_CallDynamicJava:
+      // Calling Java code so use Java calling convention
+      save_policy = matcher._register_save_policy;
+      break;
+
+    default:
+      ShouldNotReachHere();
+  }
+
+  // When using CallRuntime mark SOE registers as killed by the call
+  // so values that could show up in the RegisterMap aren't live in a
+  // callee saved register since the register wouldn't know where to
+  // find them.  CallLeaf and CallLeafNoFP are ok because they can't
+  // have debug info on them.  Strictly speaking this only needs to be
+  // done for oops since idealreg2debugmask takes care of debug info
+  // references but there no way to handle oops differently than other
+  // pointers as far as the kill mask goes.
+  bool exclude_soe = op == Op_CallRuntime;
+
+  // Fill in the kill mask for the call
+  for( OptoReg::Name r = OptoReg::Name(0); r < _last_Mach_Reg; r=OptoReg::add(r,1) ) {
+    if( !regs.Member(r) ) {     // Not already defined by the call
+      // Save-on-call register?
+      if ((save_policy[r] == 'C') ||
+          (save_policy[r] == 'A') ||
+          ((save_policy[r] == 'E') && exclude_soe)) {
+        proj->_rout.Insert(r);
+      }
+    }
+  }
+
+  return node_cnt;
+}
+
+
+//------------------------------schedule_local---------------------------------
+// Topological sort within a block.  Someday become a real scheduler.
+bool Block::schedule_local(PhaseCFG *cfg, Matcher &matcher, int *ready_cnt, VectorSet &next_call) {
+  // Already "sorted" are the block start Node (as the first entry), and
+  // the block-ending Node and any trailing control projections.  We leave
+  // these alone.  PhiNodes and ParmNodes are made to follow the block start
+  // Node.  Everything else gets topo-sorted.
+
+#ifndef PRODUCT
+    if (cfg->trace_opto_pipelining()) {
+      tty->print_cr("# --- schedule_local B%d, before: ---", _pre_order);
+      for (uint i = 0;i < _nodes.size();i++) {
+        tty->print("# ");
+        _nodes[i]->fast_dump();
+      }
+      tty->print_cr("#");
+    }
+#endif
+
+  // RootNode is already sorted
+  if( _nodes.size() == 1 ) return true;
+
+  // Move PhiNodes and ParmNodes from 1 to cnt up to the start
+  uint node_cnt = end_idx();
+  uint phi_cnt = 1;
+  uint i;
+  for( i = 1; i<node_cnt; i++ ) { // Scan for Phi
+    Node *n = _nodes[i];
+    if( n->is_Phi() ||          // Found a PhiNode or ParmNode
+        (n->is_Proj()  && n->in(0) == head()) ) {
+      // Move guy at 'phi_cnt' to the end; makes a hole at phi_cnt
+      _nodes.map(i,_nodes[phi_cnt]);
+      _nodes.map(phi_cnt++,n);  // swap Phi/Parm up front
+    } else {                    // All others
+      // Count block-local inputs to 'n'
+      uint cnt = n->len();      // Input count
+      uint local = 0;
+      for( uint j=0; j<cnt; j++ ) {
+        Node *m = n->in(j);
+        if( m && cfg->_bbs[m->_idx] == this && !m->is_top() )
+          local++;              // One more block-local input
+      }
+      ready_cnt[n->_idx] = local; // Count em up
+
+      // A few node types require changing a required edge to a precedence edge
+      // before allocation.
+      if( UseConcMarkSweepGC ) {
+        if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_StoreCM ) {
+          // Note: Required edges with an index greater than oper_input_base
+          // are not supported by the allocator.
+          // Note2: Can only depend on unmatched edge being last,
+          // can not depend on its absolute position.
+          Node *oop_store = n->in(n->req() - 1);
+          n->del_req(n->req() - 1);
+          n->add_prec(oop_store);
+          assert(cfg->_bbs[oop_store->_idx]->_dom_depth <= this->_dom_depth, "oop_store must dominate card-mark");
+        }
+      }
+      if( n->is_Mach() && n->as_Mach()->ideal_Opcode() == Op_MemBarAcquire ) {
+        Node *x = n->in(TypeFunc::Parms);
+        n->del_req(TypeFunc::Parms);
+        n->add_prec(x);
+      }
+    }
+  }
+  for(uint i2=i; i2<_nodes.size(); i2++ ) // Trailing guys get zapped count
+    ready_cnt[_nodes[i2]->_idx] = 0;
+
+  // All the prescheduled guys do not hold back internal nodes
+  uint i3;
+  for(i3 = 0; i3<phi_cnt; i3++ ) {  // For all pre-scheduled
+    Node *n = _nodes[i3];       // Get pre-scheduled
+    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+      Node* m = n->fast_out(j);
+      if( cfg->_bbs[m->_idx] ==this ) // Local-block user
+        ready_cnt[m->_idx]--;   // Fix ready count
+    }
+  }
+
+  Node_List delay;
+  // Make a worklist
+  Node_List worklist;
+  for(uint i4=i3; i4<node_cnt; i4++ ) {    // Put ready guys on worklist
+    Node *m = _nodes[i4];
+    if( !ready_cnt[m->_idx] ) {   // Zero ready count?
+      if (m->is_iteratively_computed()) {
+        // Push induction variable increments last to allow other uses
+        // of the phi to be scheduled first. The select() method breaks
+        // ties in scheduling by worklist order.
+        delay.push(m);
+      } else {
+        worklist.push(m);         // Then on to worklist!
+      }
+    }
+  }
+  while (delay.size()) {
+    Node* d = delay.pop();
+    worklist.push(d);
+  }
+
+  // Warm up the 'next_call' heuristic bits
+  needed_for_next_call(_nodes[0], next_call, cfg->_bbs);
+
+#ifndef PRODUCT
+    if (cfg->trace_opto_pipelining()) {
+      for (uint j=0; j<_nodes.size(); j++) {
+        Node     *n = _nodes[j];
+        int     idx = n->_idx;
+        tty->print("#   ready cnt:%3d  ", ready_cnt[idx]);
+        tty->print("latency:%3d  ", cfg->_node_latency.at_grow(idx));
+        tty->print("%4d: %s\n", idx, n->Name());
+      }
+    }
+#endif
+
+  // Pull from worklist and schedule
+  while( worklist.size() ) {    // Worklist is not ready
+
+#ifndef PRODUCT
+    if (cfg->trace_opto_pipelining()) {
+      tty->print("#   ready list:");
+      for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
+        Node *n = worklist[i];      // Get Node on worklist
+        tty->print(" %d", n->_idx);
+      }
+      tty->cr();
+    }
+#endif
+
+    // Select and pop a ready guy from worklist
+    Node* n = select(cfg, worklist, ready_cnt, next_call, phi_cnt);
+    _nodes.map(phi_cnt++,n);    // Schedule him next
+
+#ifndef PRODUCT
+    if (cfg->trace_opto_pipelining()) {
+      tty->print("#    select %d: %s", n->_idx, n->Name());
+      tty->print(", latency:%d", cfg->_node_latency.at_grow(n->_idx));
+      n->dump();
+      if (Verbose) {
+        tty->print("#   ready list:");
+        for( uint i=0; i<worklist.size(); i++ ) { // Inspect entire worklist
+          Node *n = worklist[i];      // Get Node on worklist
+          tty->print(" %d", n->_idx);
+        }
+        tty->cr();
+      }
+    }
+
+#endif
+    if( n->is_MachCall() ) {
+      MachCallNode *mcall = n->as_MachCall();
+      phi_cnt = sched_call(matcher, cfg->_bbs, phi_cnt, worklist, ready_cnt, mcall, next_call);
+      continue;
+    }
+    // Children are now all ready
+    for (DUIterator_Fast i5max, i5 = n->fast_outs(i5max); i5 < i5max; i5++) {
+      Node* m = n->fast_out(i5); // Get user
+      if( cfg->_bbs[m->_idx] != this ) continue;
+      if( m->is_Phi() ) continue;
+      if( !--ready_cnt[m->_idx] )
+        worklist.push(m);
+    }
+  }
+
+  if( phi_cnt != end_idx() ) {
+    // did not schedule all.  Retry, Bailout, or Die
+    Compile* C = matcher.C;
+    if (C->subsume_loads() == true && !C->failing()) {
+      // Retry with subsume_loads == false
+      // If this is the first failure, the sentinel string will "stick"
+      // to the Compile object, and the C2Compiler will see it and retry.
+      C->record_failure(C2Compiler::retry_no_subsuming_loads());
+    }
+    // assert( phi_cnt == end_idx(), "did not schedule all" );
+    return false;
+  }
+
+#ifndef PRODUCT
+  if (cfg->trace_opto_pipelining()) {
+    tty->print_cr("#");
+    tty->print_cr("# after schedule_local");
+    for (uint i = 0;i < _nodes.size();i++) {
+      tty->print("# ");
+      _nodes[i]->fast_dump();
+    }
+    tty->cr();
+  }
+#endif
+
+
+  return true;
+}
+
+//--------------------------catch_cleanup_fix_all_inputs-----------------------
+static void catch_cleanup_fix_all_inputs(Node *use, Node *old_def, Node *new_def) {
+  for (uint l = 0; l < use->len(); l++) {
+    if (use->in(l) == old_def) {
+      if (l < use->req()) {
+        use->set_req(l, new_def);
+      } else {
+        use->rm_prec(l);
+        use->add_prec(new_def);
+        l--;
+      }
+    }
+  }
+}
+
+//------------------------------catch_cleanup_find_cloned_def------------------
+static Node *catch_cleanup_find_cloned_def(Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+  assert( use_blk != def_blk, "Inter-block cleanup only");
+
+  // The use is some block below the Catch.  Find and return the clone of the def
+  // that dominates the use. If there is no clone in a dominating block, then
+  // create a phi for the def in a dominating block.
+
+  // Find which successor block dominates this use.  The successor
+  // blocks must all be single-entry (from the Catch only; I will have
+  // split blocks to make this so), hence they all dominate.
+  while( use_blk->_dom_depth > def_blk->_dom_depth+1 )
+    use_blk = use_blk->_idom;
+
+  // Find the successor
+  Node *fixup = NULL;
+
+  uint j;
+  for( j = 0; j < def_blk->_num_succs; j++ )
+    if( use_blk == def_blk->_succs[j] )
+      break;
+
+  if( j == def_blk->_num_succs ) {
+    // Block at same level in dom-tree is not a successor.  It needs a
+    // PhiNode, the PhiNode uses from the def and IT's uses need fixup.
+    Node_Array inputs = new Node_List(Thread::current()->resource_area());
+    for(uint k = 1; k < use_blk->num_preds(); k++) {
+      inputs.map(k, catch_cleanup_find_cloned_def(bbs[use_blk->pred(k)->_idx], def, def_blk, bbs, n_clone_idx));
+    }
+
+    // Check to see if the use_blk already has an identical phi inserted.
+    // If it exists, it will be at the first position since all uses of a
+    // def are processed together.
+    Node *phi = use_blk->_nodes[1];
+    if( phi->is_Phi() ) {
+      fixup = phi;
+      for (uint k = 1; k < use_blk->num_preds(); k++) {
+        if (phi->in(k) != inputs[k]) {
+          // Not a match
+          fixup = NULL;
+          break;
+        }
+      }
+    }
+
+    // If an existing PhiNode was not found, make a new one.
+    if (fixup == NULL) {
+      Node *new_phi = PhiNode::make(use_blk->head(), def);
+      use_blk->_nodes.insert(1, new_phi);
+      bbs.map(new_phi->_idx, use_blk);
+      for (uint k = 1; k < use_blk->num_preds(); k++) {
+        new_phi->set_req(k, inputs[k]);
+      }
+      fixup = new_phi;
+    }
+
+  } else {
+    // Found the use just below the Catch.  Make it use the clone.
+    fixup = use_blk->_nodes[n_clone_idx];
+  }
+
+  return fixup;
+}
+
+//--------------------------catch_cleanup_intra_block--------------------------
+// Fix all input edges in use that reference "def".  The use is in the same
+// block as the def and both have been cloned in each successor block.
+static void catch_cleanup_intra_block(Node *use, Node *def, Block *blk, int beg, int n_clone_idx) {
+
+  // Both the use and def have been cloned. For each successor block,
+  // get the clone of the use, and make its input the clone of the def
+  // found in that block.
+
+  uint use_idx = blk->find_node(use);
+  uint offset_idx = use_idx - beg;
+  for( uint k = 0; k < blk->_num_succs; k++ ) {
+    // Get clone in each successor block
+    Block *sb = blk->_succs[k];
+    Node *clone = sb->_nodes[offset_idx+1];
+    assert( clone->Opcode() == use->Opcode(), "" );
+
+    // Make use-clone reference the def-clone
+    catch_cleanup_fix_all_inputs(clone, def, sb->_nodes[n_clone_idx]);
+  }
+}
+
+//------------------------------catch_cleanup_inter_block---------------------
+// Fix all input edges in use that reference "def".  The use is in a different
+// block than the def.
+static void catch_cleanup_inter_block(Node *use, Block *use_blk, Node *def, Block *def_blk, Block_Array &bbs, int n_clone_idx) {
+  if( !use_blk ) return;        // Can happen if the use is a precedence edge
+
+  Node *new_def = catch_cleanup_find_cloned_def(use_blk, def, def_blk, bbs, n_clone_idx);
+  catch_cleanup_fix_all_inputs(use, def, new_def);
+}
+
+//------------------------------call_catch_cleanup-----------------------------
+// If we inserted any instructions between a Call and his CatchNode,
+// clone the instructions on all paths below the Catch.
+void Block::call_catch_cleanup(Block_Array &bbs) {
+
+  // End of region to clone
+  uint end = end_idx();
+  if( !_nodes[end]->is_Catch() ) return;
+  // Start of region to clone
+  uint beg = end;
+  while( _nodes[beg-1]->Opcode() != Op_MachProj ||
+        !_nodes[beg-1]->in(0)->is_Call() ) {
+    beg--;
+    assert(beg > 0,"Catch cleanup walking beyond block boundary");
+  }
+  // Range of inserted instructions is [beg, end)
+  if( beg == end ) return;
+
+  // Clone along all Catch output paths.  Clone area between the 'beg' and
+  // 'end' indices.
+  for( uint i = 0; i < _num_succs; i++ ) {
+    Block *sb = _succs[i];
+    // Clone the entire area; ignoring the edge fixup for now.
+    for( uint j = end; j > beg; j-- ) {
+      Node *clone = _nodes[j-1]->clone();
+      sb->_nodes.insert( 1, clone );
+      bbs.map(clone->_idx,sb);
+    }
+  }
+
+
+  // Fixup edges.  Check the def-use info per cloned Node
+  for(uint i2 = beg; i2 < end; i2++ ) {
+    uint n_clone_idx = i2-beg+1; // Index of clone of n in each successor block
+    Node *n = _nodes[i2];        // Node that got cloned
+    // Need DU safe iterator because of edge manipulation in calls.
+    Unique_Node_List *out = new Unique_Node_List(Thread::current()->resource_area());
+    for (DUIterator_Fast j1max, j1 = n->fast_outs(j1max); j1 < j1max; j1++) {
+      out->push(n->fast_out(j1));
+    }
+    uint max = out->size();
+    for (uint j = 0; j < max; j++) {// For all users
+      Node *use = out->pop();
+      Block *buse = bbs[use->_idx];
+      if( use->is_Phi() ) {
+        for( uint k = 1; k < use->req(); k++ )
+          if( use->in(k) == n ) {
+            Node *fixup = catch_cleanup_find_cloned_def(bbs[buse->pred(k)->_idx], n, this, bbs, n_clone_idx);
+            use->set_req(k, fixup);
+          }
+      } else {
+        if (this == buse) {
+          catch_cleanup_intra_block(use, n, this, beg, n_clone_idx);
+        } else {
+          catch_cleanup_inter_block(use, buse, n, this, bbs, n_clone_idx);
+        }
+      }
+    } // End for all users
+
+  } // End of for all Nodes in cloned area
+
+  // Remove the now-dead cloned ops
+  for(uint i3 = beg; i3 < end; i3++ ) {
+    _nodes[beg]->disconnect_inputs(NULL);
+    _nodes.remove(beg);
+  }
+
+  // If the successor blocks have a CreateEx node, move it back to the top
+  for(uint i4 = 0; i4 < _num_succs; i4++ ) {
+    Block *sb = _succs[i4];
+    uint new_cnt = end - beg;
+    // Remove any newly created, but dead, nodes.
+    for( uint j = new_cnt; j > 0; j-- ) {
+      Node *n = sb->_nodes[j];
+      if (n->outcnt() == 0 &&
+          (!n->is_Proj() || n->as_Proj()->in(0)->outcnt() == 1) ){
+        n->disconnect_inputs(NULL);
+        sb->_nodes.remove(j);
+        new_cnt--;
+      }
+    }
+    // If any newly created nodes remain, move the CreateEx node to the top
+    if (new_cnt > 0) {
+      Node *cex = sb->_nodes[1+new_cnt];
+      if( cex->is_Mach() && cex->as_Mach()->ideal_Opcode() == Op_CreateEx ) {
+        sb->_nodes.remove(1+new_cnt);
+        sb->_nodes.insert(1,cex);
+      }
+    }
+  }
+}
diff --git a/src/share/vm/opto/library_call.cpp b/src/share/vm/opto/library_call.cpp
new file mode 100644
index 000000000..d78f62d40
--- /dev/null
+++ b/src/share/vm/opto/library_call.cpp
@@ -0,0 +1,4921 @@
+/*
+ * Copyright 1999-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_library_call.cpp.incl"
+
+class LibraryIntrinsic : public InlineCallGenerator {
+  // Extend the set of intrinsics known to the runtime:
+ public:
+ private:
+  bool             _is_virtual;
+  vmIntrinsics::ID _intrinsic_id;
+
+ public:
+  LibraryIntrinsic(ciMethod* m, bool is_virtual, vmIntrinsics::ID id)
+    : InlineCallGenerator(m),
+      _is_virtual(is_virtual),
+      _intrinsic_id(id)
+  {
+  }
+  virtual bool is_intrinsic() const { return true; }
+  virtual bool is_virtual()   const { return _is_virtual; }
+  virtual JVMState* generate(JVMState* jvms);
+  vmIntrinsics::ID intrinsic_id() const { return _intrinsic_id; }
+};
+
+
+// Local helper class for LibraryIntrinsic:
+class LibraryCallKit : public GraphKit {
+ private:
+  LibraryIntrinsic* _intrinsic;   // the library intrinsic being called
+
+ public:
+  LibraryCallKit(JVMState* caller, LibraryIntrinsic* intrinsic)
+    : GraphKit(caller),
+      _intrinsic(intrinsic)
+  {
+  }
+
+  ciMethod*         caller()    const    { return jvms()->method(); }
+  int               bci()       const    { return jvms()->bci(); }
+  LibraryIntrinsic* intrinsic() const    { return _intrinsic; }
+  vmIntrinsics::ID  intrinsic_id() const { return _intrinsic->intrinsic_id(); }
+  ciMethod*         callee()    const    { return _intrinsic->method(); }
+  ciSignature*      signature() const    { return callee()->signature(); }
+  int               arg_size()  const    { return callee()->arg_size(); }
+
+  bool try_to_inline();
+
+  // Helper functions to inline natives
+  void push_result(RegionNode* region, PhiNode* value);
+  Node* generate_guard(Node* test, RegionNode* region, float true_prob);
+  Node* generate_slow_guard(Node* test, RegionNode* region);
+  Node* generate_fair_guard(Node* test, RegionNode* region);
+  Node* generate_negative_guard(Node* index, RegionNode* region,
+                                // resulting CastII of index:
+                                Node* *pos_index = NULL);
+  Node* generate_nonpositive_guard(Node* index, bool never_negative,
+                                   // resulting CastII of index:
+                                   Node* *pos_index = NULL);
+  Node* generate_limit_guard(Node* offset, Node* subseq_length,
+                             Node* array_length,
+                             RegionNode* region);
+  Node* generate_current_thread(Node* &tls_output);
+  address basictype2arraycopy(BasicType t, Node *src_offset, Node *dest_offset,
+                              bool disjoint_bases, const char* &name);
+  Node* load_mirror_from_klass(Node* klass);
+  Node* load_klass_from_mirror_common(Node* mirror, bool never_see_null,
+                                      int nargs,
+                                      RegionNode* region, int null_path,
+                                      int offset);
+  Node* load_klass_from_mirror(Node* mirror, bool never_see_null, int nargs,
+                               RegionNode* region, int null_path) {
+    int offset = java_lang_Class::klass_offset_in_bytes();
+    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+                                         region, null_path,
+                                         offset);
+  }
+  Node* load_array_klass_from_mirror(Node* mirror, bool never_see_null,
+                                     int nargs,
+                                     RegionNode* region, int null_path) {
+    int offset = java_lang_Class::array_klass_offset_in_bytes();
+    return load_klass_from_mirror_common(mirror, never_see_null, nargs,
+                                         region, null_path,
+                                         offset);
+  }
+  Node* generate_access_flags_guard(Node* kls,
+                                    int modifier_mask, int modifier_bits,
+                                    RegionNode* region);
+  Node* generate_interface_guard(Node* kls, RegionNode* region);
+  Node* generate_array_guard(Node* kls, RegionNode* region) {
+    return generate_array_guard_common(kls, region, false, false);
+  }
+  Node* generate_non_array_guard(Node* kls, RegionNode* region) {
+    return generate_array_guard_common(kls, region, false, true);
+  }
+  Node* generate_objArray_guard(Node* kls, RegionNode* region) {
+    return generate_array_guard_common(kls, region, true, false);
+  }
+  Node* generate_non_objArray_guard(Node* kls, RegionNode* region) {
+    return generate_array_guard_common(kls, region, true, true);
+  }
+  Node* generate_array_guard_common(Node* kls, RegionNode* region,
+                                    bool obj_array, bool not_array);
+  Node* generate_virtual_guard(Node* obj_klass, RegionNode* slow_region);
+  CallJavaNode* generate_method_call(vmIntrinsics::ID method_id,
+                                     bool is_virtual = false, bool is_static = false);
+  CallJavaNode* generate_method_call_static(vmIntrinsics::ID method_id) {
+    return generate_method_call(method_id, false, true);
+  }
+  CallJavaNode* generate_method_call_virtual(vmIntrinsics::ID method_id) {
+    return generate_method_call(method_id, true, false);
+  }
+
+  bool inline_string_compareTo();
+  bool inline_string_indexOf();
+  Node* string_indexOf(Node* string_object, ciTypeArray* target_array, jint offset, jint cache_i, jint md2_i);
+  Node* pop_math_arg();
+  bool runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName);
+  bool inline_math_native(vmIntrinsics::ID id);
+  bool inline_trig(vmIntrinsics::ID id);
+  bool inline_trans(vmIntrinsics::ID id);
+  bool inline_abs(vmIntrinsics::ID id);
+  bool inline_sqrt(vmIntrinsics::ID id);
+  bool inline_pow(vmIntrinsics::ID id);
+  bool inline_exp(vmIntrinsics::ID id);
+  bool inline_min_max(vmIntrinsics::ID id);
+  Node* generate_min_max(vmIntrinsics::ID id, Node* x, Node* y);
+  // This returns Type::AnyPtr, RawPtr, or OopPtr.
+  int classify_unsafe_addr(Node* &base, Node* &offset);
+  Node* make_unsafe_address(Node* base, Node* offset);
+  bool inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile);
+  bool inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static);
+  bool inline_unsafe_allocate();
+  bool inline_unsafe_copyMemory();
+  bool inline_native_currentThread();
+  bool inline_native_time_funcs(bool isNano);
+  bool inline_native_isInterrupted();
+  bool inline_native_Class_query(vmIntrinsics::ID id);
+  bool inline_native_subtype_check();
+
+  bool inline_native_newArray();
+  bool inline_native_getLength();
+  bool inline_array_copyOf(bool is_copyOfRange);
+  bool inline_native_clone(bool is_virtual);
+  bool inline_native_Reflection_getCallerClass();
+  bool inline_native_AtomicLong_get();
+  bool inline_native_AtomicLong_attemptUpdate();
+  bool is_method_invoke_or_aux_frame(JVMState* jvms);
+  // Helper function for inlining native object hash method
+  bool inline_native_hashcode(bool is_virtual, bool is_static);
+  bool inline_native_getClass();
+
+  // Helper functions for inlining arraycopy
+  bool inline_arraycopy();
+  void generate_arraycopy(const TypePtr* adr_type,
+                          BasicType basic_elem_type,
+                          Node* src,  Node* src_offset,
+                          Node* dest, Node* dest_offset,
+                          Node* copy_length,
+                          int nargs,  // arguments on stack for debug info
+                          bool disjoint_bases = false,
+                          bool length_never_negative = false,
+                          RegionNode* slow_region = NULL);
+  AllocateArrayNode* tightly_coupled_allocation(Node* ptr,
+                                                RegionNode* slow_region);
+  void generate_clear_array(const TypePtr* adr_type,
+                            Node* dest,
+                            BasicType basic_elem_type,
+                            Node* slice_off,
+                            Node* slice_len,
+                            Node* slice_end);
+  bool generate_block_arraycopy(const TypePtr* adr_type,
+                                BasicType basic_elem_type,
+                                AllocateNode* alloc,
+                                Node* src,  Node* src_offset,
+                                Node* dest, Node* dest_offset,
+                                Node* dest_size);
+  void generate_slow_arraycopy(const TypePtr* adr_type,
+                               Node* src,  Node* src_offset,
+                               Node* dest, Node* dest_offset,
+                               Node* copy_length,
+                               int nargs);
+  Node* generate_checkcast_arraycopy(const TypePtr* adr_type,
+                                     Node* dest_elem_klass,
+                                     Node* src,  Node* src_offset,
+                                     Node* dest, Node* dest_offset,
+                                     Node* copy_length, int nargs);
+  Node* generate_generic_arraycopy(const TypePtr* adr_type,
+                                   Node* src,  Node* src_offset,
+                                   Node* dest, Node* dest_offset,
+                                   Node* copy_length, int nargs);
+  void generate_unchecked_arraycopy(const TypePtr* adr_type,
+                                    BasicType basic_elem_type,
+                                    bool disjoint_bases,
+                                    Node* src,  Node* src_offset,
+                                    Node* dest, Node* dest_offset,
+                                    Node* copy_length);
+  bool inline_unsafe_CAS(BasicType type);
+  bool inline_unsafe_ordered_store(BasicType type);
+  bool inline_fp_conversions(vmIntrinsics::ID id);
+  bool inline_reverseBytes(vmIntrinsics::ID id);
+};
+
+
+//---------------------------make_vm_intrinsic----------------------------
+CallGenerator* Compile::make_vm_intrinsic(ciMethod* m, bool is_virtual) {
+  vmIntrinsics::ID id = m->intrinsic_id();
+  assert(id != vmIntrinsics::_none, "must be a VM intrinsic");
+
+  if (DisableIntrinsic[0] != '\0'
+      && strstr(DisableIntrinsic, vmIntrinsics::name_at(id)) != NULL) {
+    // disabled by a user request on the command line:
+    // example: -XX:DisableIntrinsic=_hashCode,_getClass
+    return NULL;
+  }
+
+  if (!m->is_loaded()) {
+    // do not attempt to inline unloaded methods
+    return NULL;
+  }
+
+  // Only a few intrinsics implement a virtual dispatch.
+  // They are expensive calls which are also frequently overridden.
+  if (is_virtual) {
+    switch (id) {
+    case vmIntrinsics::_hashCode:
+    case vmIntrinsics::_clone:
+      // OK, Object.hashCode and Object.clone intrinsics come in both flavors
+      break;
+    default:
+      return NULL;
+    }
+  }
+
+  // -XX:-InlineNatives disables nearly all intrinsics:
+  if (!InlineNatives) {
+    switch (id) {
+    case vmIntrinsics::_indexOf:
+    case vmIntrinsics::_compareTo:
+      break;  // InlineNatives does not control String.compareTo
+    default:
+      return NULL;
+    }
+  }
+
+  switch (id) {
+  case vmIntrinsics::_compareTo:
+    if (!SpecialStringCompareTo)  return NULL;
+    break;
+  case vmIntrinsics::_indexOf:
+    if (!SpecialStringIndexOf)  return NULL;
+    break;
+  case vmIntrinsics::_arraycopy:
+    if (!InlineArrayCopy)  return NULL;
+    break;
+  case vmIntrinsics::_copyMemory:
+    if (StubRoutines::unsafe_arraycopy() == NULL)  return NULL;
+    if (!InlineArrayCopy)  return NULL;
+    break;
+  case vmIntrinsics::_hashCode:
+    if (!InlineObjectHash)  return NULL;
+    break;
+  case vmIntrinsics::_clone:
+  case vmIntrinsics::_copyOf:
+  case vmIntrinsics::_copyOfRange:
+    if (!InlineObjectCopy)  return NULL;
+    // These also use the arraycopy intrinsic mechanism:
+    if (!InlineArrayCopy)  return NULL;
+    break;
+  case vmIntrinsics::_checkIndex:
+    // We do not intrinsify this.  The optimizer does fine with it.
+    return NULL;
+
+  case vmIntrinsics::_get_AtomicLong:
+  case vmIntrinsics::_attemptUpdate:
+    if (!InlineAtomicLong)  return NULL;
+    break;
+
+  case vmIntrinsics::_Object_init:
+  case vmIntrinsics::_invoke:
+    // We do not intrinsify these; they are marked for other purposes.
+    return NULL;
+
+  case vmIntrinsics::_getCallerClass:
+    if (!UseNewReflection)  return NULL;
+    if (!InlineReflectionGetCallerClass)  return NULL;
+    if (!JDK_Version::is_gte_jdk14x_version())  return NULL;
+    break;
+
+ default:
+    break;
+  }
+
+  // -XX:-InlineClassNatives disables natives from the Class class.
+  // The flag applies to all reflective calls, notably Array.newArray
+  // (visible to Java programmers as Array.newInstance).
+  if (m->holder()->name() == ciSymbol::java_lang_Class() ||
+      m->holder()->name() == ciSymbol::java_lang_reflect_Array()) {
+    if (!InlineClassNatives)  return NULL;
+  }
+
+  // -XX:-InlineThreadNatives disables natives from the Thread class.
+  if (m->holder()->name() == ciSymbol::java_lang_Thread()) {
+    if (!InlineThreadNatives)  return NULL;
+  }
+
+  // -XX:-InlineMathNatives disables natives from the Math,Float and Double classes.
+  if (m->holder()->name() == ciSymbol::java_lang_Math() ||
+      m->holder()->name() == ciSymbol::java_lang_Float() ||
+      m->holder()->name() == ciSymbol::java_lang_Double()) {
+    if (!InlineMathNatives)  return NULL;
+  }
+
+  // -XX:-InlineUnsafeOps disables natives from the Unsafe class.
+  if (m->holder()->name() == ciSymbol::sun_misc_Unsafe()) {
+    if (!InlineUnsafeOps)  return NULL;
+  }
+
+  return new LibraryIntrinsic(m, is_virtual, (vmIntrinsics::ID) id);
+}
+
+//----------------------register_library_intrinsics-----------------------
+// Initialize this file's data structures, for each Compile instance.
+void Compile::register_library_intrinsics() {
+  // Nothing to do here.
+}
+
+JVMState* LibraryIntrinsic::generate(JVMState* jvms) {
+  LibraryCallKit kit(jvms, this);
+  Compile* C = kit.C;
+  int nodes = C->unique();
+#ifndef PRODUCT
+  if ((PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) && Verbose) {
+    char buf[1000];
+    const char* str = vmIntrinsics::short_name_as_C_string(intrinsic_id(), buf, sizeof(buf));
+    tty->print_cr("Intrinsic %s", str);
+  }
+#endif
+  if (kit.try_to_inline()) {
+    if (PrintIntrinsics || PrintInlining NOT_PRODUCT( || PrintOptoInlining) ) {
+      tty->print("Inlining intrinsic %s%s at bci:%d in",
+                 vmIntrinsics::name_at(intrinsic_id()),
+                 (is_virtual() ? " (virtual)" : ""), kit.bci());
+      kit.caller()->print_short_name(tty);
+      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    }
+    C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_worked);
+    if (C->log()) {
+      C->log()->elem("intrinsic id='%s'%s nodes='%d'",
+                     vmIntrinsics::name_at(intrinsic_id()),
+                     (is_virtual() ? " virtual='1'" : ""),
+                     C->unique() - nodes);
+    }
+    return kit.transfer_exceptions_into_jvms();
+  }
+
+  if (PrintIntrinsics) {
+    switch (intrinsic_id()) {
+    case vmIntrinsics::_invoke:
+    case vmIntrinsics::_Object_init:
+      // We do not expect to inline these, so do not produce any noise about them.
+      break;
+    default:
+      tty->print("Did not inline intrinsic %s%s at bci:%d in",
+                 vmIntrinsics::name_at(intrinsic_id()),
+                 (is_virtual() ? " (virtual)" : ""), kit.bci());
+      kit.caller()->print_short_name(tty);
+      tty->print_cr(" (%d bytes)", kit.caller()->code_size());
+    }
+  }
+  C->gather_intrinsic_statistics(intrinsic_id(), is_virtual(), Compile::_intrinsic_failed);
+  return NULL;
+}
+
+bool LibraryCallKit::try_to_inline() {
+  // Handle symbolic names for otherwise undistinguished boolean switches:
+  const bool is_store       = true;
+  const bool is_native_ptr  = true;
+  const bool is_static      = true;
+
+  switch (intrinsic_id()) {
+  case vmIntrinsics::_hashCode:
+    return inline_native_hashcode(intrinsic()->is_virtual(), !is_static);
+  case vmIntrinsics::_identityHashCode:
+    return inline_native_hashcode(/*!virtual*/ false, is_static);
+  case vmIntrinsics::_getClass:
+    return inline_native_getClass();
+
+  case vmIntrinsics::_dsin:
+  case vmIntrinsics::_dcos:
+  case vmIntrinsics::_dtan:
+  case vmIntrinsics::_dabs:
+  case vmIntrinsics::_datan2:
+  case vmIntrinsics::_dsqrt:
+  case vmIntrinsics::_dexp:
+  case vmIntrinsics::_dlog:
+  case vmIntrinsics::_dlog10:
+  case vmIntrinsics::_dpow:
+    return inline_math_native(intrinsic_id());
+
+  case vmIntrinsics::_min:
+  case vmIntrinsics::_max:
+    return inline_min_max(intrinsic_id());
+
+  case vmIntrinsics::_arraycopy:
+    return inline_arraycopy();
+
+  case vmIntrinsics::_compareTo:
+    return inline_string_compareTo();
+  case vmIntrinsics::_indexOf:
+    return inline_string_indexOf();
+
+  case vmIntrinsics::_getObject:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, false);
+  case vmIntrinsics::_getBoolean:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, false);
+  case vmIntrinsics::_getByte:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, false);
+  case vmIntrinsics::_getShort:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, false);
+  case vmIntrinsics::_getChar:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, false);
+  case vmIntrinsics::_getInt:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, false);
+  case vmIntrinsics::_getLong:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, false);
+  case vmIntrinsics::_getFloat:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, false);
+  case vmIntrinsics::_getDouble:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, false);
+
+  case vmIntrinsics::_putObject:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, false);
+  case vmIntrinsics::_putBoolean:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, false);
+  case vmIntrinsics::_putByte:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, false);
+  case vmIntrinsics::_putShort:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, false);
+  case vmIntrinsics::_putChar:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, false);
+  case vmIntrinsics::_putInt:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, false);
+  case vmIntrinsics::_putLong:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, false);
+  case vmIntrinsics::_putFloat:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, false);
+  case vmIntrinsics::_putDouble:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, false);
+
+  case vmIntrinsics::_getByte_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_BYTE, false);
+  case vmIntrinsics::_getShort_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_SHORT, false);
+  case vmIntrinsics::_getChar_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_CHAR, false);
+  case vmIntrinsics::_getInt_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_INT, false);
+  case vmIntrinsics::_getLong_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_LONG, false);
+  case vmIntrinsics::_getFloat_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_FLOAT, false);
+  case vmIntrinsics::_getDouble_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_DOUBLE, false);
+  case vmIntrinsics::_getAddress_raw:
+    return inline_unsafe_access(is_native_ptr, !is_store, T_ADDRESS, false);
+
+  case vmIntrinsics::_putByte_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_BYTE, false);
+  case vmIntrinsics::_putShort_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_SHORT, false);
+  case vmIntrinsics::_putChar_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_CHAR, false);
+  case vmIntrinsics::_putInt_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_INT, false);
+  case vmIntrinsics::_putLong_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_LONG, false);
+  case vmIntrinsics::_putFloat_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_FLOAT, false);
+  case vmIntrinsics::_putDouble_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_DOUBLE, false);
+  case vmIntrinsics::_putAddress_raw:
+    return inline_unsafe_access(is_native_ptr, is_store, T_ADDRESS, false);
+
+  case vmIntrinsics::_getObjectVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_OBJECT, true);
+  case vmIntrinsics::_getBooleanVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_BOOLEAN, true);
+  case vmIntrinsics::_getByteVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_BYTE, true);
+  case vmIntrinsics::_getShortVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_SHORT, true);
+  case vmIntrinsics::_getCharVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_CHAR, true);
+  case vmIntrinsics::_getIntVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_INT, true);
+  case vmIntrinsics::_getLongVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_LONG, true);
+  case vmIntrinsics::_getFloatVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_FLOAT, true);
+  case vmIntrinsics::_getDoubleVolatile:
+    return inline_unsafe_access(!is_native_ptr, !is_store, T_DOUBLE, true);
+
+  case vmIntrinsics::_putObjectVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_OBJECT, true);
+  case vmIntrinsics::_putBooleanVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_BOOLEAN, true);
+  case vmIntrinsics::_putByteVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_BYTE, true);
+  case vmIntrinsics::_putShortVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_SHORT, true);
+  case vmIntrinsics::_putCharVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_CHAR, true);
+  case vmIntrinsics::_putIntVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_INT, true);
+  case vmIntrinsics::_putLongVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_LONG, true);
+  case vmIntrinsics::_putFloatVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_FLOAT, true);
+  case vmIntrinsics::_putDoubleVolatile:
+    return inline_unsafe_access(!is_native_ptr, is_store, T_DOUBLE, true);
+
+  case vmIntrinsics::_prefetchRead:
+    return inline_unsafe_prefetch(!is_native_ptr, !is_store, !is_static);
+  case vmIntrinsics::_prefetchWrite:
+    return inline_unsafe_prefetch(!is_native_ptr, is_store, !is_static);
+  case vmIntrinsics::_prefetchReadStatic:
+    return inline_unsafe_prefetch(!is_native_ptr, !is_store, is_static);
+  case vmIntrinsics::_prefetchWriteStatic:
+    return inline_unsafe_prefetch(!is_native_ptr, is_store, is_static);
+
+  case vmIntrinsics::_compareAndSwapObject:
+    return inline_unsafe_CAS(T_OBJECT);
+  case vmIntrinsics::_compareAndSwapInt:
+    return inline_unsafe_CAS(T_INT);
+  case vmIntrinsics::_compareAndSwapLong:
+    return inline_unsafe_CAS(T_LONG);
+
+  case vmIntrinsics::_putOrderedObject:
+    return inline_unsafe_ordered_store(T_OBJECT);
+  case vmIntrinsics::_putOrderedInt:
+    return inline_unsafe_ordered_store(T_INT);
+  case vmIntrinsics::_putOrderedLong:
+    return inline_unsafe_ordered_store(T_LONG);
+
+  case vmIntrinsics::_currentThread:
+    return inline_native_currentThread();
+  case vmIntrinsics::_isInterrupted:
+    return inline_native_isInterrupted();
+
+  case vmIntrinsics::_currentTimeMillis:
+    return inline_native_time_funcs(false);
+  case vmIntrinsics::_nanoTime:
+    return inline_native_time_funcs(true);
+  case vmIntrinsics::_allocateInstance:
+    return inline_unsafe_allocate();
+  case vmIntrinsics::_copyMemory:
+    return inline_unsafe_copyMemory();
+  case vmIntrinsics::_newArray:
+    return inline_native_newArray();
+  case vmIntrinsics::_getLength:
+    return inline_native_getLength();
+  case vmIntrinsics::_copyOf:
+    return inline_array_copyOf(false);
+  case vmIntrinsics::_copyOfRange:
+    return inline_array_copyOf(true);
+  case vmIntrinsics::_clone:
+    return inline_native_clone(intrinsic()->is_virtual());
+
+  case vmIntrinsics::_isAssignableFrom:
+    return inline_native_subtype_check();
+
+  case vmIntrinsics::_isInstance:
+  case vmIntrinsics::_getModifiers:
+  case vmIntrinsics::_isInterface:
+  case vmIntrinsics::_isArray:
+  case vmIntrinsics::_isPrimitive:
+  case vmIntrinsics::_getSuperclass:
+  case vmIntrinsics::_getComponentType:
+  case vmIntrinsics::_getClassAccessFlags:
+    return inline_native_Class_query(intrinsic_id());
+
+  case vmIntrinsics::_floatToRawIntBits:
+  case vmIntrinsics::_floatToIntBits:
+  case vmIntrinsics::_intBitsToFloat:
+  case vmIntrinsics::_doubleToRawLongBits:
+  case vmIntrinsics::_doubleToLongBits:
+  case vmIntrinsics::_longBitsToDouble:
+    return inline_fp_conversions(intrinsic_id());
+
+  case vmIntrinsics::_reverseBytes_i:
+  case vmIntrinsics::_reverseBytes_l:
+    return inline_reverseBytes((vmIntrinsics::ID) intrinsic_id());
+
+  case vmIntrinsics::_get_AtomicLong:
+    return inline_native_AtomicLong_get();
+  case vmIntrinsics::_attemptUpdate:
+    return inline_native_AtomicLong_attemptUpdate();
+
+  case vmIntrinsics::_getCallerClass:
+    return inline_native_Reflection_getCallerClass();
+
+  default:
+    // If you get here, it may be that someone has added a new intrinsic
+    // to the list in vmSymbols.hpp without implementing it here.
+#ifndef PRODUCT
+    if ((PrintMiscellaneous && (Verbose || WizardMode)) || PrintOpto) {
+      tty->print_cr("*** Warning: Unimplemented intrinsic %s(%d)",
+                    vmIntrinsics::name_at(intrinsic_id()), intrinsic_id());
+    }
+#endif
+    return false;
+  }
+}
+
+//------------------------------push_result------------------------------
+// Helper function for finishing intrinsics.
+void LibraryCallKit::push_result(RegionNode* region, PhiNode* value) {
+  record_for_igvn(region);
+  set_control(_gvn.transform(region));
+  BasicType value_type = value->type()->basic_type();
+  push_node(value_type, _gvn.transform(value));
+}
+
+//------------------------------generate_guard---------------------------
+// Helper function for generating guarded fast-slow graph structures.
+// The given 'test', if true, guards a slow path.  If the test fails
+// then a fast path can be taken.  (We generally hope it fails.)
+// In all cases, GraphKit::control() is updated to the fast path.
+// The returned value represents the control for the slow path.
+// The return value is never 'top'; it is either a valid control
+// or NULL if it is obvious that the slow path can never be taken.
+// Also, if region and the slow control are not NULL, the slow edge
+// is appended to the region.
+Node* LibraryCallKit::generate_guard(Node* test, RegionNode* region, float true_prob) {
+  if (stopped()) {
+    // Already short circuited.
+    return NULL;
+  }
+
+  // Build an if node and its projections.
+  // If test is true we take the slow path, which we assume is uncommon.
+  if (_gvn.type(test) == TypeInt::ZERO) {
+    // The slow branch is never taken.  No need to build this guard.
+    return NULL;
+  }
+
+  IfNode* iff = create_and_map_if(control(), test, true_prob, COUNT_UNKNOWN);
+
+  Node* if_slow = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+  if (if_slow == top()) {
+    // The slow branch is never taken.  No need to build this guard.
+    return NULL;
+  }
+
+  if (region != NULL)
+    region->add_req(if_slow);
+
+  Node* if_fast = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+  set_control(if_fast);
+
+  return if_slow;
+}
+
+inline Node* LibraryCallKit::generate_slow_guard(Node* test, RegionNode* region) {
+  return generate_guard(test, region, PROB_UNLIKELY_MAG(3));
+}
+inline Node* LibraryCallKit::generate_fair_guard(Node* test, RegionNode* region) {
+  return generate_guard(test, region, PROB_FAIR);
+}
+
+inline Node* LibraryCallKit::generate_negative_guard(Node* index, RegionNode* region,
+                                                     Node* *pos_index) {
+  if (stopped())
+    return NULL;                // already stopped
+  if (_gvn.type(index)->higher_equal(TypeInt::POS)) // [0,maxint]
+    return NULL;                // index is already adequately typed
+  Node* cmp_lt = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
+  Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
+  Node* is_neg = generate_guard(bol_lt, region, PROB_MIN);
+  if (is_neg != NULL && pos_index != NULL) {
+    // Emulate effect of Parse::adjust_map_after_if.
+    Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS);
+    ccast->set_req(0, control());
+    (*pos_index) = _gvn.transform(ccast);
+  }
+  return is_neg;
+}
+
+inline Node* LibraryCallKit::generate_nonpositive_guard(Node* index, bool never_negative,
+                                                        Node* *pos_index) {
+  if (stopped())
+    return NULL;                // already stopped
+  if (_gvn.type(index)->higher_equal(TypeInt::POS1)) // [1,maxint]
+    return NULL;                // index is already adequately typed
+  Node* cmp_le = _gvn.transform( new (C, 3) CmpINode(index, intcon(0)) );
+  BoolTest::mask le_or_eq = (never_negative ? BoolTest::eq : BoolTest::le);
+  Node* bol_le = _gvn.transform( new (C, 2) BoolNode(cmp_le, le_or_eq) );
+  Node* is_notp = generate_guard(bol_le, NULL, PROB_MIN);
+  if (is_notp != NULL && pos_index != NULL) {
+    // Emulate effect of Parse::adjust_map_after_if.
+    Node* ccast = new (C, 2) CastIINode(index, TypeInt::POS1);
+    ccast->set_req(0, control());
+    (*pos_index) = _gvn.transform(ccast);
+  }
+  return is_notp;
+}
+
+// Make sure that 'position' is a valid limit index, in [0..length].
+// There are two equivalent plans for checking this:
+//   A. (offset + copyLength)  unsigned<=  arrayLength
+//   B. offset  <=  (arrayLength - copyLength)
+// We require that all of the values above, except for the sum and
+// difference, are already known to be non-negative.
+// Plan A is robust in the face of overflow, if offset and copyLength
+// are both hugely positive.
+//
+// Plan B is less direct and intuitive, but it does not overflow at
+// all, since the difference of two non-negatives is always
+// representable.  Whenever Java methods must perform the equivalent
+// check they generally use Plan B instead of Plan A.
+// For the moment we use Plan A.
+inline Node* LibraryCallKit::generate_limit_guard(Node* offset,
+                                                  Node* subseq_length,
+                                                  Node* array_length,
+                                                  RegionNode* region) {
+  if (stopped())
+    return NULL;                // already stopped
+  bool zero_offset = _gvn.type(offset) == TypeInt::ZERO;
+  if (zero_offset && _gvn.eqv_uncast(subseq_length, array_length))
+    return NULL;                // common case of whole-array copy
+  Node* last = subseq_length;
+  if (!zero_offset)             // last += offset
+    last = _gvn.transform( new (C, 3) AddINode(last, offset));
+  Node* cmp_lt = _gvn.transform( new (C, 3) CmpUNode(array_length, last) );
+  Node* bol_lt = _gvn.transform( new (C, 2) BoolNode(cmp_lt, BoolTest::lt) );
+  Node* is_over = generate_guard(bol_lt, region, PROB_MIN);
+  return is_over;
+}
+
+
+//--------------------------generate_current_thread--------------------
+Node* LibraryCallKit::generate_current_thread(Node* &tls_output) {
+  ciKlass*    thread_klass = env()->Thread_klass();
+  const Type* thread_type  = TypeOopPtr::make_from_klass(thread_klass)->cast_to_ptr_type(TypePtr::NotNull);
+  Node* thread = _gvn.transform(new (C, 1) ThreadLocalNode());
+  Node* p = basic_plus_adr(top()/*!oop*/, thread, in_bytes(JavaThread::threadObj_offset()));
+  Node* threadObj = make_load(NULL, p, thread_type, T_OBJECT);
+  tls_output = thread;
+  return threadObj;
+}
+
+
+//------------------------------inline_string_compareTo------------------------
+bool LibraryCallKit::inline_string_compareTo() {
+
+  const int value_offset = java_lang_String::value_offset_in_bytes();
+  const int count_offset = java_lang_String::count_offset_in_bytes();
+  const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+  _sp += 2;
+  Node *argument = pop();  // pop non-receiver first:  it was pushed second
+  Node *receiver = pop();
+
+  // Null check on self without removing any arguments.  The argument
+  // null check technically happens in the wrong place, which can lead to
+  // invalid stack traces when string compare is inlined into a method
+  // which handles NullPointerExceptions.
+  _sp += 2;
+  receiver = do_null_check(receiver, T_OBJECT);
+  argument = do_null_check(argument, T_OBJECT);
+  _sp -= 2;
+  if (stopped()) {
+    return true;
+  }
+
+  ciInstanceKlass* klass = env()->String_klass();
+  const TypeInstPtr* string_type =
+    TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
+
+  Node* compare =
+    _gvn.transform(new (C, 7) StrCompNode(
+                        control(),
+                        memory(TypeAryPtr::CHARS),
+                        memory(string_type->add_offset(value_offset)),
+                        memory(string_type->add_offset(count_offset)),
+                        memory(string_type->add_offset(offset_offset)),
+                        receiver,
+                        argument));
+  push(compare);
+  return true;
+}
+
+// Java version of String.indexOf(constant string)
+// class StringDecl {
+//   StringDecl(char[] ca) {
+//     offset = 0;
+//     count = ca.length;
+//     value = ca;
+//   }
+//   int offset;
+//   int count;
+//   char[] value;
+// }
+//
+// static int string_indexOf_J(StringDecl string_object, char[] target_object,
+//                             int targetOffset, int cache_i, int md2) {
+//   int cache = cache_i;
+//   int sourceOffset = string_object.offset;
+//   int sourceCount = string_object.count;
+//   int targetCount = target_object.length;
+//
+//   int targetCountLess1 = targetCount - 1;
+//   int sourceEnd = sourceOffset + sourceCount - targetCountLess1;
+//
+//   char[] source = string_object.value;
+//   char[] target = target_object;
+//   int lastChar = target[targetCountLess1];
+//
+//  outer_loop:
+//   for (int i = sourceOffset; i < sourceEnd; ) {
+//     int src = source[i + targetCountLess1];
+//     if (src == lastChar) {
+//       // With random strings and a 4-character alphabet,
+//       // reverse matching at this point sets up 0.8% fewer
+//       // frames, but (paradoxically) makes 0.3% more probes.
+//       // Since those probes are nearer the lastChar probe,
+//       // there is may be a net D$ win with reverse matching.
+//       // But, reversing loop inhibits unroll of inner loop
+//       // for unknown reason.  So, does running outer loop from
+//       // (sourceOffset - targetCountLess1) to (sourceOffset + sourceCount)
+//       for (int j = 0; j < targetCountLess1; j++) {
+//         if (target[targetOffset + j] != source[i+j]) {
+//           if ((cache & (1 << source[i+j])) == 0) {
+//             if (md2 < j+1) {
+//               i += j+1;
+//               continue outer_loop;
+//             }
+//           }
+//           i += md2;
+//           continue outer_loop;
+//         }
+//       }
+//       return i - sourceOffset;
+//     }
+//     if ((cache & (1 << src)) == 0) {
+//       i += targetCountLess1;
+//     } // using "i += targetCount;" and an "else i++;" causes a jump to jump.
+//     i++;
+//   }
+//   return -1;
+// }
+
+//------------------------------string_indexOf------------------------
+Node* LibraryCallKit::string_indexOf(Node* string_object, ciTypeArray* target_array, jint targetOffset_i,
+                                     jint cache_i, jint md2_i) {
+
+  Node* no_ctrl  = NULL;
+  float likely   = PROB_LIKELY(0.9);
+  float unlikely = PROB_UNLIKELY(0.9);
+
+  const int value_offset  = java_lang_String::value_offset_in_bytes();
+  const int count_offset  = java_lang_String::count_offset_in_bytes();
+  const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+  ciInstanceKlass* klass = env()->String_klass();
+  const TypeInstPtr* string_type = TypeInstPtr::make(TypePtr::BotPTR, klass, false, NULL, 0);
+  const TypeAryPtr*  source_type = TypeAryPtr::make(TypePtr::NotNull, TypeAry::make(TypeInt::CHAR,TypeInt::POS), ciTypeArrayKlass::make(T_CHAR), true, 0);
+
+  Node* sourceOffseta = basic_plus_adr(string_object, string_object, offset_offset);
+  Node* sourceOffset  = make_load(no_ctrl, sourceOffseta, TypeInt::INT, T_INT, string_type->add_offset(offset_offset));
+  Node* sourceCounta  = basic_plus_adr(string_object, string_object, count_offset);
+  Node* sourceCount   = make_load(no_ctrl, sourceCounta, TypeInt::INT, T_INT, string_type->add_offset(count_offset));
+  Node* sourcea       = basic_plus_adr(string_object, string_object, value_offset);
+  Node* source        = make_load(no_ctrl, sourcea, source_type, T_OBJECT, string_type->add_offset(value_offset));
+
+  Node* target = _gvn.transform(ConPNode::make(C, target_array));
+  jint target_length = target_array->length();
+  const TypeAry* target_array_type = TypeAry::make(TypeInt::CHAR, TypeInt::make(0, target_length, Type::WidenMin));
+  const TypeAryPtr* target_type = TypeAryPtr::make(TypePtr::BotPTR, target_array_type, target_array->klass(), true, Type::OffsetBot);
+
+  IdealKit kit(gvn(), control(), merged_memory());
+#define __ kit.
+  Node* zero             = __ ConI(0);
+  Node* one              = __ ConI(1);
+  Node* cache            = __ ConI(cache_i);
+  Node* md2              = __ ConI(md2_i);
+  Node* lastChar         = __ ConI(target_array->char_at(target_length - 1));
+  Node* targetCount      = __ ConI(target_length);
+  Node* targetCountLess1 = __ ConI(target_length - 1);
+  Node* targetOffset     = __ ConI(targetOffset_i);
+  Node* sourceEnd        = __ SubI(__ AddI(sourceOffset, sourceCount), targetCountLess1);
+
+  IdealVariable rtn(kit), i(kit), j(kit); __ declares_done();
+  Node* outer_loop = __ make_label(2 /* goto */);
+  Node* return_    = __ make_label(1);
+
+  __ set(rtn,__ ConI(-1));
+  __ loop(i, sourceOffset, BoolTest::lt, sourceEnd); {
+       Node* i2  = __ AddI(__ value(i), targetCountLess1);
+       // pin to prohibit loading of "next iteration" value which may SEGV (rare)
+       Node* src = load_array_element(__ ctrl(), source, i2, TypeAryPtr::CHARS);
+       __ if_then(src, BoolTest::eq, lastChar, unlikely); {
+         __ loop(j, zero, BoolTest::lt, targetCountLess1); {
+              Node* tpj = __ AddI(targetOffset, __ value(j));
+              Node* targ = load_array_element(no_ctrl, target, tpj, target_type);
+              Node* ipj  = __ AddI(__ value(i), __ value(j));
+              Node* src2 = load_array_element(no_ctrl, source, ipj, TypeAryPtr::CHARS);
+              __ if_then(targ, BoolTest::ne, src2); {
+                __ if_then(__ AndI(cache, __ LShiftI(one, src2)), BoolTest::eq, zero); {
+                  __ if_then(md2, BoolTest::lt, __ AddI(__ value(j), one)); {
+                    __ increment(i, __ AddI(__ value(j), one));
+                    __ goto_(outer_loop);
+                  } __ end_if(); __ dead(j);
+                }__ end_if(); __ dead(j);
+                __ increment(i, md2);
+                __ goto_(outer_loop);
+              }__ end_if();
+              __ increment(j, one);
+         }__ end_loop(); __ dead(j);
+         __ set(rtn, __ SubI(__ value(i), sourceOffset)); __ dead(i);
+         __ goto_(return_);
+       }__ end_if();
+       __ if_then(__ AndI(cache, __ LShiftI(one, src)), BoolTest::eq, zero, likely); {
+         __ increment(i, targetCountLess1);
+       }__ end_if();
+       __ increment(i, one);
+       __ bind(outer_loop);
+  }__ end_loop(); __ dead(i);
+  __ bind(return_);
+  __ drain_delay_transform();
+
+  set_control(__ ctrl());
+  Node* result = __ value(rtn);
+#undef __
+  C->set_has_loops(true);
+  return result;
+}
+
+
+//------------------------------inline_string_indexOf------------------------
+bool LibraryCallKit::inline_string_indexOf() {
+
+  _sp += 2;
+  Node *argument = pop();  // pop non-receiver first:  it was pushed second
+  Node *receiver = pop();
+
+  // don't intrinsify is argument isn't a constant string.
+  if (!argument->is_Con()) {
+    return false;
+  }
+  const TypeOopPtr* str_type = _gvn.type(argument)->isa_oopptr();
+  if (str_type == NULL) {
+    return false;
+  }
+  ciInstanceKlass* klass = env()->String_klass();
+  ciObject* str_const = str_type->const_oop();
+  if (str_const == NULL || str_const->klass() != klass) {
+    return false;
+  }
+  ciInstance* str = str_const->as_instance();
+  assert(str != NULL, "must be instance");
+
+  const int value_offset  = java_lang_String::value_offset_in_bytes();
+  const int count_offset  = java_lang_String::count_offset_in_bytes();
+  const int offset_offset = java_lang_String::offset_offset_in_bytes();
+
+  ciObject* v = str->field_value_by_offset(value_offset).as_object();
+  int       o = str->field_value_by_offset(offset_offset).as_int();
+  int       c = str->field_value_by_offset(count_offset).as_int();
+  ciTypeArray* pat = v->as_type_array(); // pattern (argument) character array
+
+  // constant strings have no offset and count == length which
+  // simplifies the resulting code somewhat so lets optimize for that.
+  if (o != 0 || c != pat->length()) {
+    return false;
+  }
+
+  // Null check on self without removing any arguments.  The argument
+  // null check technically happens in the wrong place, which can lead to
+  // invalid stack traces when string compare is inlined into a method
+  // which handles NullPointerExceptions.
+  _sp += 2;
+  receiver = do_null_check(receiver, T_OBJECT);
+  // No null check on the argument is needed since it's a constant String oop.
+  _sp -= 2;
+  if (stopped()) {
+    return true;
+  }
+
+  // The null string as a pattern always returns 0 (match at beginning of string)
+  if (c == 0) {
+    push(intcon(0));
+    return true;
+  }
+
+  jchar lastChar = pat->char_at(o + (c - 1));
+  int cache = 0;
+  int i;
+  for (i = 0; i < c - 1; i++) {
+    assert(i < pat->length(), "out of range");
+    cache |= (1 << (pat->char_at(o + i) & (sizeof(cache) * BitsPerByte - 1)));
+  }
+
+  int md2 = c;
+  for (i = 0; i < c - 1; i++) {
+    assert(i < pat->length(), "out of range");
+    if (pat->char_at(o + i) == lastChar) {
+      md2 = (c - 1) - i;
+    }
+  }
+
+  Node* result = string_indexOf(receiver, pat, o, cache, md2);
+  push(result);
+  return true;
+}
+
+//--------------------------pop_math_arg--------------------------------
+// Pop a double argument to a math function from the stack
+// rounding it if necessary.
+Node * LibraryCallKit::pop_math_arg() {
+  Node *arg = pop_pair();
+  if( Matcher::strict_fp_requires_explicit_rounding && UseSSE<=1 )
+    arg = _gvn.transform( new (C, 2) RoundDoubleNode(0, arg) );
+  return arg;
+}
+
+//------------------------------inline_trig----------------------------------
+// Inline sin/cos/tan instructions, if possible.  If rounding is required, do
+// argument reduction which will turn into a fast/slow diamond.
+bool LibraryCallKit::inline_trig(vmIntrinsics::ID id) {
+  _sp += arg_size();            // restore stack pointer
+  Node* arg = pop_math_arg();
+  Node* trig = NULL;
+
+  switch (id) {
+  case vmIntrinsics::_dsin:
+    trig = _gvn.transform((Node*)new (C, 2) SinDNode(arg));
+    break;
+  case vmIntrinsics::_dcos:
+    trig = _gvn.transform((Node*)new (C, 2) CosDNode(arg));
+    break;
+  case vmIntrinsics::_dtan:
+    trig = _gvn.transform((Node*)new (C, 2) TanDNode(arg));
+    break;
+  default:
+    assert(false, "bad intrinsic was passed in");
+    return false;
+  }
+
+  // Rounding required?  Check for argument reduction!
+  if( Matcher::strict_fp_requires_explicit_rounding ) {
+
+    static const double     pi_4 =  0.7853981633974483;
+    static const double neg_pi_4 = -0.7853981633974483;
+    // pi/2 in 80-bit extended precision
+    // static const unsigned char pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0x3f,0x00,0x00,0x00,0x00,0x00,0x00};
+    // -pi/2 in 80-bit extended precision
+    // static const unsigned char neg_pi_2_bits_x[] = {0x35,0xc2,0x68,0x21,0xa2,0xda,0x0f,0xc9,0xff,0xbf,0x00,0x00,0x00,0x00,0x00,0x00};
+    // Cutoff value for using this argument reduction technique
+    //static const double    pi_2_minus_epsilon =  1.564660403643354;
+    //static const double neg_pi_2_plus_epsilon = -1.564660403643354;
+
+    // Pseudocode for sin:
+    // if (x <= Math.PI / 4.0) {
+    //   if (x >= -Math.PI / 4.0) return  fsin(x);
+    //   if (x >= -Math.PI / 2.0) return -fcos(x + Math.PI / 2.0);
+    // } else {
+    //   if (x <=  Math.PI / 2.0) return  fcos(x - Math.PI / 2.0);
+    // }
+    // return StrictMath.sin(x);
+
+    // Pseudocode for cos:
+    // if (x <= Math.PI / 4.0) {
+    //   if (x >= -Math.PI / 4.0) return  fcos(x);
+    //   if (x >= -Math.PI / 2.0) return  fsin(x + Math.PI / 2.0);
+    // } else {
+    //   if (x <=  Math.PI / 2.0) return -fsin(x - Math.PI / 2.0);
+    // }
+    // return StrictMath.cos(x);
+
+    // Actually, sticking in an 80-bit Intel value into C2 will be tough; it
+    // requires a special machine instruction to load it.  Instead we'll try
+    // the 'easy' case.  If we really need the extra range +/- PI/2 we'll
+    // probably do the math inside the SIN encoding.
+
+    // Make the merge point
+    RegionNode *r = new (C, 3) RegionNode(3);
+    Node *phi = new (C, 3) PhiNode(r,Type::DOUBLE);
+
+    // Flatten arg so we need only 1 test
+    Node *abs = _gvn.transform(new (C, 2) AbsDNode(arg));
+    // Node for PI/4 constant
+    Node *pi4 = makecon(TypeD::make(pi_4));
+    // Check PI/4 : abs(arg)
+    Node *cmp = _gvn.transform(new (C, 3) CmpDNode(pi4,abs));
+    // Check: If PI/4 < abs(arg) then go slow
+    Node *bol = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::lt ) );
+    // Branch either way
+    IfNode *iff = create_and_xform_if(control(),bol, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+    set_control(opt_iff(r,iff));
+
+    // Set fast path result
+    phi->init_req(2,trig);
+
+    // Slow path - non-blocking leaf call
+    Node* call = NULL;
+    switch (id) {
+    case vmIntrinsics::_dsin:
+      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+                               CAST_FROM_FN_PTR(address, SharedRuntime::dsin),
+                               "Sin", NULL, arg, top());
+      break;
+    case vmIntrinsics::_dcos:
+      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+                               CAST_FROM_FN_PTR(address, SharedRuntime::dcos),
+                               "Cos", NULL, arg, top());
+      break;
+    case vmIntrinsics::_dtan:
+      call = make_runtime_call(RC_LEAF, OptoRuntime::Math_D_D_Type(),
+                               CAST_FROM_FN_PTR(address, SharedRuntime::dtan),
+                               "Tan", NULL, arg, top());
+      break;
+    }
+    assert(control()->in(0) == call, "");
+    Node* slow_result = _gvn.transform(new (C, 1) ProjNode(call,TypeFunc::Parms));
+    r->init_req(1,control());
+    phi->init_req(1,slow_result);
+
+    // Post-merge
+    set_control(_gvn.transform(r));
+    record_for_igvn(r);
+    trig = _gvn.transform(phi);
+
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+  }
+  // Push result back on JVM stack
+  push_pair(trig);
+  return true;
+}
+
+//------------------------------inline_sqrt-------------------------------------
+// Inline square root instruction, if possible.
+bool LibraryCallKit::inline_sqrt(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_dsqrt, "Not square root");
+  _sp += arg_size();        // restore stack pointer
+  push_pair(_gvn.transform(new (C, 2) SqrtDNode(0, pop_math_arg())));
+  return true;
+}
+
+//------------------------------inline_abs-------------------------------------
+// Inline absolute value instruction, if possible.
+bool LibraryCallKit::inline_abs(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_dabs, "Not absolute value");
+  _sp += arg_size();        // restore stack pointer
+  push_pair(_gvn.transform(new (C, 2) AbsDNode(pop_math_arg())));
+  return true;
+}
+
+//------------------------------inline_exp-------------------------------------
+// Inline exp instructions, if possible.  The Intel hardware only misses
+// really odd corner cases (+/- Infinity).  Just uncommon-trap them.
+bool LibraryCallKit::inline_exp(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_dexp, "Not exp");
+
+  // If this inlining ever returned NaN in the past, we do not intrinsify it
+  // every again.  NaN results requires StrictMath.exp handling.
+  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
+
+  // Do not intrinsify on older platforms which lack cmove.
+  if (ConditionalMoveLimit == 0)  return false;
+
+  _sp += arg_size();        // restore stack pointer
+  Node *x = pop_math_arg();
+  Node *result = _gvn.transform(new (C, 2) ExpDNode(0,x));
+
+  //-------------------
+  //result=(result.isNaN())? StrictMath::exp():result;
+  // Check: If isNaN() by checking result!=result? then go to Strict Math
+  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
+  // Build the boolean node
+  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
+
+  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
+    // End the current control-flow path
+    push_pair(x);
+    // Math.exp intrinsic returned a NaN, which requires StrictMath.exp
+    // to handle.  Recompile without intrinsifying Math.exp
+    uncommon_trap(Deoptimization::Reason_intrinsic,
+                  Deoptimization::Action_make_not_entrant);
+  }
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  push_pair(result);
+
+  return true;
+}
+
+//------------------------------inline_pow-------------------------------------
+// Inline power instructions, if possible.
+bool LibraryCallKit::inline_pow(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_dpow, "Not pow");
+
+  // If this inlining ever returned NaN in the past, we do not intrinsify it
+  // every again.  NaN results requires StrictMath.pow handling.
+  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
+
+  // Do not intrinsify on older platforms which lack cmove.
+  if (ConditionalMoveLimit == 0)  return false;
+
+  // Pseudocode for pow
+  // if (x <= 0.0) {
+  //   if ((double)((int)y)==y) { // if y is int
+  //     result = ((1&(int)y)==0)?-DPow(abs(x), y):DPow(abs(x), y)
+  //   } else {
+  //     result = NaN;
+  //   }
+  // } else {
+  //   result = DPow(x,y);
+  // }
+  // if (result != result)?  {
+  //   ucommon_trap();
+  // }
+  // return result;
+
+  _sp += arg_size();        // restore stack pointer
+  Node* y = pop_math_arg();
+  Node* x = pop_math_arg();
+
+  Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, x, y) );
+
+  // Short form: if not top-level (i.e., Math.pow but inlining Math.pow
+  // inside of something) then skip the fancy tests and just check for
+  // NaN result.
+  Node *result = NULL;
+  if( jvms()->depth() >= 1 ) {
+    result = fast_result;
+  } else {
+
+    // Set the merge point for If node with condition of (x <= 0.0)
+    // There are four possible paths to region node and phi node
+    RegionNode *r = new (C, 4) RegionNode(4);
+    Node *phi = new (C, 4) PhiNode(r, Type::DOUBLE);
+
+    // Build the first if node: if (x <= 0.0)
+    // Node for 0 constant
+    Node *zeronode = makecon(TypeD::ZERO);
+    // Check x:0
+    Node *cmp = _gvn.transform(new (C, 3) CmpDNode(x, zeronode));
+    // Check: If (x<=0) then go complex path
+    Node *bol1 = _gvn.transform( new (C, 2) BoolNode( cmp, BoolTest::le ) );
+    // Branch either way
+    IfNode *if1 = create_and_xform_if(control(),bol1, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+    Node *opt_test = _gvn.transform(if1);
+    //assert( opt_test->is_If(), "Expect an IfNode");
+    IfNode *opt_if1 = (IfNode*)opt_test;
+    // Fast path taken; set region slot 3
+    Node *fast_taken = _gvn.transform( new (C, 1) IfFalseNode(opt_if1) );
+    r->init_req(3,fast_taken); // Capture fast-control
+
+    // Fast path not-taken, i.e. slow path
+    Node *complex_path = _gvn.transform( new (C, 1) IfTrueNode(opt_if1) );
+
+    // Set fast path result
+    Node *fast_result = _gvn.transform( new (C, 3) PowDNode(0, y, x) );
+    phi->init_req(3, fast_result);
+
+    // Complex path
+    // Build the second if node (if y is int)
+    // Node for (int)y
+    Node *inty = _gvn.transform( new (C, 2) ConvD2INode(y));
+    // Node for (double)((int) y)
+    Node *doubleinty= _gvn.transform( new (C, 2) ConvI2DNode(inty));
+    // Check (double)((int) y) : y
+    Node *cmpinty= _gvn.transform(new (C, 3) CmpDNode(doubleinty, y));
+    // Check if (y isn't int) then go to slow path
+
+    Node *bol2 = _gvn.transform( new (C, 2) BoolNode( cmpinty, BoolTest::ne ) );
+    // Branch eith way
+    IfNode *if2 = create_and_xform_if(complex_path,bol2, PROB_STATIC_INFREQUENT, COUNT_UNKNOWN);
+    Node *slow_path = opt_iff(r,if2); // Set region path 2
+
+    // Calculate DPow(abs(x), y)*(1 & (int)y)
+    // Node for constant 1
+    Node *conone = intcon(1);
+    // 1& (int)y
+    Node *signnode= _gvn.transform( new (C, 3) AndINode(conone, inty) );
+    // zero node
+    Node *conzero = intcon(0);
+    // Check (1&(int)y)==0?
+    Node *cmpeq1 = _gvn.transform(new (C, 3) CmpINode(signnode, conzero));
+    // Check if (1&(int)y)!=0?, if so the result is negative
+    Node *bol3 = _gvn.transform( new (C, 2) BoolNode( cmpeq1, BoolTest::ne ) );
+    // abs(x)
+    Node *absx=_gvn.transform( new (C, 2) AbsDNode(x));
+    // abs(x)^y
+    Node *absxpowy = _gvn.transform( new (C, 3) PowDNode(0, y, absx) );
+    // -abs(x)^y
+    Node *negabsxpowy = _gvn.transform(new (C, 2) NegDNode (absxpowy));
+    // (1&(int)y)==1?-DPow(abs(x), y):DPow(abs(x), y)
+    Node *signresult = _gvn.transform( CMoveNode::make(C, NULL, bol3, absxpowy, negabsxpowy, Type::DOUBLE));
+    // Set complex path fast result
+    phi->init_req(2, signresult);
+
+    static const jlong nan_bits = CONST64(0x7ff8000000000000);
+    Node *slow_result = makecon(TypeD::make(*(double*)&nan_bits)); // return NaN
+    r->init_req(1,slow_path);
+    phi->init_req(1,slow_result);
+
+    // Post merge
+    set_control(_gvn.transform(r));
+    record_for_igvn(r);
+    result=_gvn.transform(phi);
+  }
+
+  //-------------------
+  //result=(result.isNaN())? uncommon_trap():result;
+  // Check: If isNaN() by checking result!=result? then go to Strict Math
+  Node* cmpisnan = _gvn.transform(new (C, 3) CmpDNode(result,result));
+  // Build the boolean node
+  Node* bolisnum = _gvn.transform( new (C, 2) BoolNode(cmpisnan, BoolTest::eq) );
+
+  { BuildCutout unless(this, bolisnum, PROB_STATIC_FREQUENT);
+    // End the current control-flow path
+    push_pair(x);
+    push_pair(y);
+    // Math.pow intrinsic returned a NaN, which requires StrictMath.pow
+    // to handle.  Recompile without intrinsifying Math.pow.
+    uncommon_trap(Deoptimization::Reason_intrinsic,
+                  Deoptimization::Action_make_not_entrant);
+  }
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  push_pair(result);
+
+  return true;
+}
+
+//------------------------------inline_trans-------------------------------------
+// Inline transcendental instructions, if possible.  The Intel hardware gets
+// these right, no funny corner cases missed.
+bool LibraryCallKit::inline_trans(vmIntrinsics::ID id) {
+  _sp += arg_size();        // restore stack pointer
+  Node* arg = pop_math_arg();
+  Node* trans = NULL;
+
+  switch (id) {
+  case vmIntrinsics::_dlog:
+    trans = _gvn.transform((Node*)new (C, 2) LogDNode(arg));
+    break;
+  case vmIntrinsics::_dlog10:
+    trans = _gvn.transform((Node*)new (C, 2) Log10DNode(arg));
+    break;
+  default:
+    assert(false, "bad intrinsic was passed in");
+    return false;
+  }
+
+  // Push result back on JVM stack
+  push_pair(trans);
+  return true;
+}
+
+//------------------------------runtime_math-----------------------------
+bool LibraryCallKit::runtime_math(const TypeFunc* call_type, address funcAddr, const char* funcName) {
+  Node* a = NULL;
+  Node* b = NULL;
+
+  assert(call_type == OptoRuntime::Math_DD_D_Type() || call_type == OptoRuntime::Math_D_D_Type(),
+         "must be (DD)D or (D)D type");
+
+  // Inputs
+  _sp += arg_size();        // restore stack pointer
+  if (call_type == OptoRuntime::Math_DD_D_Type()) {
+    b = pop_math_arg();
+  }
+  a = pop_math_arg();
+
+  const TypePtr* no_memory_effects = NULL;
+  Node* trig = make_runtime_call(RC_LEAF, call_type, funcAddr, funcName,
+                                 no_memory_effects,
+                                 a, top(), b, b ? top() : NULL);
+  Node* value = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+0));
+#ifdef ASSERT
+  Node* value_top = _gvn.transform(new (C, 1) ProjNode(trig, TypeFunc::Parms+1));
+  assert(value_top == top(), "second value must be top");
+#endif
+
+  push_pair(value);
+  return true;
+}
+
+//------------------------------inline_math_native-----------------------------
+bool LibraryCallKit::inline_math_native(vmIntrinsics::ID id) {
+  switch (id) {
+    // These intrinsics are not properly supported on all hardware
+  case vmIntrinsics::_dcos: return Matcher::has_match_rule(Op_CosD) ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dcos), "COS");
+  case vmIntrinsics::_dsin: return Matcher::has_match_rule(Op_SinD) ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dsin), "SIN");
+  case vmIntrinsics::_dtan: return Matcher::has_match_rule(Op_TanD) ? inline_trig(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dtan), "TAN");
+
+  case vmIntrinsics::_dlog:   return Matcher::has_match_rule(Op_LogD) ? inline_trans(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog), "LOG");
+  case vmIntrinsics::_dlog10: return Matcher::has_match_rule(Op_Log10D) ? inline_trans(id) :
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dlog10), "LOG10");
+
+    // These intrinsics are supported on all hardware
+  case vmIntrinsics::_dsqrt: return Matcher::has_match_rule(Op_SqrtD) ? inline_sqrt(id) : false;
+  case vmIntrinsics::_dabs:  return Matcher::has_match_rule(Op_AbsD)  ? inline_abs(id)  : false;
+
+    // These intrinsics don't work on X86.  The ad implementation doesn't
+    // handle NaN's properly.  Instead of returning infinity, the ad
+    // implementation returns a NaN on overflow. See bug: 6304089
+    // Once the ad implementations are fixed, change the code below
+    // to match the intrinsics above
+
+  case vmIntrinsics::_dexp:  return
+    runtime_math(OptoRuntime::Math_D_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dexp), "EXP");
+  case vmIntrinsics::_dpow:  return
+    runtime_math(OptoRuntime::Math_DD_D_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::dpow), "POW");
+
+   // These intrinsics are not yet correctly implemented
+  case vmIntrinsics::_datan2:
+    return false;
+
+  default:
+    ShouldNotReachHere();
+    return false;
+  }
+}
+
+static bool is_simple_name(Node* n) {
+  return (n->req() == 1         // constant
+          || (n->is_Type() && n->as_Type()->type()->singleton())
+          || n->is_Proj()       // parameter or return value
+          || n->is_Phi()        // local of some sort
+          );
+}
+
+//----------------------------inline_min_max-----------------------------------
+bool LibraryCallKit::inline_min_max(vmIntrinsics::ID id) {
+  push(generate_min_max(id, argument(0), argument(1)));
+
+  return true;
+}
+
+Node*
+LibraryCallKit::generate_min_max(vmIntrinsics::ID id, Node* x0, Node* y0) {
+  // These are the candidate return value:
+  Node* xvalue = x0;
+  Node* yvalue = y0;
+
+  if (xvalue == yvalue) {
+    return xvalue;
+  }
+
+  bool want_max = (id == vmIntrinsics::_max);
+
+  const TypeInt* txvalue = _gvn.type(xvalue)->isa_int();
+  const TypeInt* tyvalue = _gvn.type(yvalue)->isa_int();
+  if (txvalue == NULL || tyvalue == NULL)  return top();
+  // This is not really necessary, but it is consistent with a
+  // hypothetical MaxINode::Value method:
+  int widen = MAX2(txvalue->_widen, tyvalue->_widen);
+
+  // %%% This folding logic should (ideally) be in a different place.
+  // Some should be inside IfNode, and there to be a more reliable
+  // transformation of ?: style patterns into cmoves.  We also want
+  // more powerful optimizations around cmove and min/max.
+
+  // Try to find a dominating comparison of these guys.
+  // It can simplify the index computation for Arrays.copyOf
+  // and similar uses of System.arraycopy.
+  // First, compute the normalized version of CmpI(x, y).
+  int   cmp_op = Op_CmpI;
+  Node* xkey = xvalue;
+  Node* ykey = yvalue;
+  Node* ideal_cmpxy = _gvn.transform( new(C, 3) CmpINode(xkey, ykey) );
+  if (ideal_cmpxy->is_Cmp()) {
+    // E.g., if we have CmpI(length - offset, count),
+    // it might idealize to CmpI(length, count + offset)
+    cmp_op = ideal_cmpxy->Opcode();
+    xkey = ideal_cmpxy->in(1);
+    ykey = ideal_cmpxy->in(2);
+  }
+
+  // Start by locating any relevant comparisons.
+  Node* start_from = (xkey->outcnt() < ykey->outcnt()) ? xkey : ykey;
+  Node* cmpxy = NULL;
+  Node* cmpyx = NULL;
+  for (DUIterator_Fast kmax, k = start_from->fast_outs(kmax); k < kmax; k++) {
+    Node* cmp = start_from->fast_out(k);
+    if (cmp->outcnt() > 0 &&            // must have prior uses
+        cmp->in(0) == NULL &&           // must be context-independent
+        cmp->Opcode() == cmp_op) {      // right kind of compare
+      if (cmp->in(1) == xkey && cmp->in(2) == ykey)  cmpxy = cmp;
+      if (cmp->in(1) == ykey && cmp->in(2) == xkey)  cmpyx = cmp;
+    }
+  }
+
+  const int NCMPS = 2;
+  Node* cmps[NCMPS] = { cmpxy, cmpyx };
+  int cmpn;
+  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
+    if (cmps[cmpn] != NULL)  break;     // find a result
+  }
+  if (cmpn < NCMPS) {
+    // Look for a dominating test that tells us the min and max.
+    int depth = 0;                // Limit search depth for speed
+    Node* dom = control();
+    for (; dom != NULL; dom = IfNode::up_one_dom(dom, true)) {
+      if (++depth >= 100)  break;
+      Node* ifproj = dom;
+      if (!ifproj->is_Proj())  continue;
+      Node* iff = ifproj->in(0);
+      if (!iff->is_If())  continue;
+      Node* bol = iff->in(1);
+      if (!bol->is_Bool())  continue;
+      Node* cmp = bol->in(1);
+      if (cmp == NULL)  continue;
+      for (cmpn = 0; cmpn < NCMPS; cmpn++)
+        if (cmps[cmpn] == cmp)  break;
+      if (cmpn == NCMPS)  continue;
+      BoolTest::mask btest = bol->as_Bool()->_test._test;
+      if (ifproj->is_IfFalse())  btest = BoolTest(btest).negate();
+      if (cmp->in(1) == ykey)    btest = BoolTest(btest).commute();
+      // At this point, we know that 'x btest y' is true.
+      switch (btest) {
+      case BoolTest::eq:
+        // They are proven equal, so we can collapse the min/max.
+        // Either value is the answer.  Choose the simpler.
+        if (is_simple_name(yvalue) && !is_simple_name(xvalue))
+          return yvalue;
+        return xvalue;
+      case BoolTest::lt:          // x < y
+      case BoolTest::le:          // x <= y
+        return (want_max ? yvalue : xvalue);
+      case BoolTest::gt:          // x > y
+      case BoolTest::ge:          // x >= y
+        return (want_max ? xvalue : yvalue);
+      }
+    }
+  }
+
+  // We failed to find a dominating test.
+  // Let's pick a test that might GVN with prior tests.
+  Node*          best_bol   = NULL;
+  BoolTest::mask best_btest = BoolTest::illegal;
+  for (cmpn = 0; cmpn < NCMPS; cmpn++) {
+    Node* cmp = cmps[cmpn];
+    if (cmp == NULL)  continue;
+    for (DUIterator_Fast jmax, j = cmp->fast_outs(jmax); j < jmax; j++) {
+      Node* bol = cmp->fast_out(j);
+      if (!bol->is_Bool())  continue;
+      BoolTest::mask btest = bol->as_Bool()->_test._test;
+      if (btest == BoolTest::eq || btest == BoolTest::ne)  continue;
+      if (cmp->in(1) == ykey)   btest = BoolTest(btest).commute();
+      if (bol->outcnt() > (best_bol == NULL ? 0 : best_bol->outcnt())) {
+        best_bol   = bol->as_Bool();
+        best_btest = btest;
+      }
+    }
+  }
+
+  Node* answer_if_true  = NULL;
+  Node* answer_if_false = NULL;
+  switch (best_btest) {
+  default:
+    if (cmpxy == NULL)
+      cmpxy = ideal_cmpxy;
+    best_bol = _gvn.transform( new(C, 2) BoolNode(cmpxy, BoolTest::lt) );
+    // and fall through:
+  case BoolTest::lt:          // x < y
+  case BoolTest::le:          // x <= y
+    answer_if_true  = (want_max ? yvalue : xvalue);
+    answer_if_false = (want_max ? xvalue : yvalue);
+    break;
+  case BoolTest::gt:          // x > y
+  case BoolTest::ge:          // x >= y
+    answer_if_true  = (want_max ? xvalue : yvalue);
+    answer_if_false = (want_max ? yvalue : xvalue);
+    break;
+  }
+
+  jint hi, lo;
+  if (want_max) {
+    // We can sharpen the minimum.
+    hi = MAX2(txvalue->_hi, tyvalue->_hi);
+    lo = MAX2(txvalue->_lo, tyvalue->_lo);
+  } else {
+    // We can sharpen the maximum.
+    hi = MIN2(txvalue->_hi, tyvalue->_hi);
+    lo = MIN2(txvalue->_lo, tyvalue->_lo);
+  }
+
+  // Use a flow-free graph structure, to avoid creating excess control edges
+  // which could hinder other optimizations.
+  // Since Math.min/max is often used with arraycopy, we want
+  // tightly_coupled_allocation to be able to see beyond min/max expressions.
+  Node* cmov = CMoveNode::make(C, NULL, best_bol,
+                               answer_if_false, answer_if_true,
+                               TypeInt::make(lo, hi, widen));
+
+  return _gvn.transform(cmov);
+
+  /*
+  // This is not as desirable as it may seem, since Min and Max
+  // nodes do not have a full set of optimizations.
+  // And they would interfere, anyway, with 'if' optimizations
+  // and with CMoveI canonical forms.
+  switch (id) {
+  case vmIntrinsics::_min:
+    result_val = _gvn.transform(new (C, 3) MinINode(x,y)); break;
+  case vmIntrinsics::_max:
+    result_val = _gvn.transform(new (C, 3) MaxINode(x,y)); break;
+  default:
+    ShouldNotReachHere();
+  }
+  */
+}
+
+inline int
+LibraryCallKit::classify_unsafe_addr(Node* &base, Node* &offset) {
+  const TypePtr* base_type = TypePtr::NULL_PTR;
+  if (base != NULL)  base_type = _gvn.type(base)->isa_ptr();
+  if (base_type == NULL) {
+    // Unknown type.
+    return Type::AnyPtr;
+  } else if (base_type == TypePtr::NULL_PTR) {
+    // Since this is a NULL+long form, we have to switch to a rawptr.
+    base   = _gvn.transform( new (C, 2) CastX2PNode(offset) );
+    offset = MakeConX(0);
+    return Type::RawPtr;
+  } else if (base_type->base() == Type::RawPtr) {
+    return Type::RawPtr;
+  } else if (base_type->isa_oopptr()) {
+    // Base is never null => always a heap address.
+    if (base_type->ptr() == TypePtr::NotNull) {
+      return Type::OopPtr;
+    }
+    // Offset is small => always a heap address.
+    const TypeX* offset_type = _gvn.type(offset)->isa_intptr_t();
+    if (offset_type != NULL &&
+        base_type->offset() == 0 &&     // (should always be?)
+        offset_type->_lo >= 0 &&
+        !MacroAssembler::needs_explicit_null_check(offset_type->_hi)) {
+      return Type::OopPtr;
+    }
+    // Otherwise, it might either be oop+off or NULL+addr.
+    return Type::AnyPtr;
+  } else {
+    // No information:
+    return Type::AnyPtr;
+  }
+}
+
+inline Node* LibraryCallKit::make_unsafe_address(Node* base, Node* offset) {
+  int kind = classify_unsafe_addr(base, offset);
+  if (kind == Type::RawPtr) {
+    return basic_plus_adr(top(), base, offset);
+  } else {
+    return basic_plus_adr(base, offset);
+  }
+}
+
+//----------------------------inline_reverseBytes_int/long-------------------
+// inline Int.reverseBytes(int)
+// inline Long.reverseByes(long)
+bool LibraryCallKit::inline_reverseBytes(vmIntrinsics::ID id) {
+  assert(id == vmIntrinsics::_reverseBytes_i || id == vmIntrinsics::_reverseBytes_l, "not reverse Bytes");
+  if (id == vmIntrinsics::_reverseBytes_i && !Matcher::has_match_rule(Op_ReverseBytesI)) return false;
+  if (id == vmIntrinsics::_reverseBytes_l && !Matcher::has_match_rule(Op_ReverseBytesL)) return false;
+  _sp += arg_size();        // restore stack pointer
+  switch (id) {
+  case vmIntrinsics::_reverseBytes_i:
+    push(_gvn.transform(new (C, 2) ReverseBytesINode(0, pop())));
+    break;
+  case vmIntrinsics::_reverseBytes_l:
+    push_pair(_gvn.transform(new (C, 2) ReverseBytesLNode(0, pop_pair())));
+    break;
+  default:
+    ;
+  }
+  return true;
+}
+
+//----------------------------inline_unsafe_access----------------------------
+
+const static BasicType T_ADDRESS_HOLDER = T_LONG;
+
+// Interpret Unsafe.fieldOffset cookies correctly:
+extern jlong Unsafe_field_offset_to_byte_offset(jlong field_offset);
+
+bool LibraryCallKit::inline_unsafe_access(bool is_native_ptr, bool is_store, BasicType type, bool is_volatile) {
+  if (callee()->is_static())  return false;  // caller must have the capability!
+
+#ifndef PRODUCT
+  {
+    ResourceMark rm;
+    // Check the signatures.
+    ciSignature* sig = signature();
+#ifdef ASSERT
+    if (!is_store) {
+      // Object getObject(Object base, int/long offset), etc.
+      BasicType rtype = sig->return_type()->basic_type();
+      if (rtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::getAddress_name())
+          rtype = T_ADDRESS;  // it is really a C void*
+      assert(rtype == type, "getter must return the expected value");
+      if (!is_native_ptr) {
+        assert(sig->count() == 2, "oop getter has 2 arguments");
+        assert(sig->type_at(0)->basic_type() == T_OBJECT, "getter base is object");
+        assert(sig->type_at(1)->basic_type() == T_LONG, "getter offset is correct");
+      } else {
+        assert(sig->count() == 1, "native getter has 1 argument");
+        assert(sig->type_at(0)->basic_type() == T_LONG, "getter base is long");
+      }
+    } else {
+      // void putObject(Object base, int/long offset, Object x), etc.
+      assert(sig->return_type()->basic_type() == T_VOID, "putter must not return a value");
+      if (!is_native_ptr) {
+        assert(sig->count() == 3, "oop putter has 3 arguments");
+        assert(sig->type_at(0)->basic_type() == T_OBJECT, "putter base is object");
+        assert(sig->type_at(1)->basic_type() == T_LONG, "putter offset is correct");
+      } else {
+        assert(sig->count() == 2, "native putter has 2 arguments");
+        assert(sig->type_at(0)->basic_type() == T_LONG, "putter base is long");
+      }
+      BasicType vtype = sig->type_at(sig->count()-1)->basic_type();
+      if (vtype == T_ADDRESS_HOLDER && callee()->name() == ciSymbol::putAddress_name())
+        vtype = T_ADDRESS;  // it is really a C void*
+      assert(vtype == type, "putter must accept the expected value");
+    }
+#endif // ASSERT
+ }
+#endif //PRODUCT
+
+  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
+
+  int type_words = type2size[ (type == T_ADDRESS) ? T_LONG : type ];
+
+  // Argument words:  "this" plus (oop/offset) or (lo/hi) args plus maybe 1 or 2 value words
+  int nargs = 1 + (is_native_ptr ? 2 : 3) + (is_store ? type_words : 0);
+
+  debug_only(int saved_sp = _sp);
+  _sp += nargs;
+
+  Node* val;
+  debug_only(val = (Node*)(uintptr_t)-1);
+
+
+  if (is_store) {
+    // Get the value being stored.  (Pop it first; it was pushed last.)
+    switch (type) {
+    case T_DOUBLE:
+    case T_LONG:
+    case T_ADDRESS:
+      val = pop_pair();
+      break;
+    default:
+      val = pop();
+    }
+  }
+
+  // Build address expression.  See the code in inline_unsafe_prefetch.
+  Node *adr;
+  Node *heap_base_oop = top();
+  if (!is_native_ptr) {
+    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
+    Node* offset = pop_pair();
+    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+    Node* base   = pop();
+    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+    // to be plain byte offsets, which are also the same as those accepted
+    // by oopDesc::field_base.
+    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+           "fieldOffset must be byte-scaled");
+    // 32-bit machines ignore the high half!
+    offset = ConvL2X(offset);
+    adr = make_unsafe_address(base, offset);
+    heap_base_oop = base;
+  } else {
+    Node* ptr = pop_pair();
+    // Adjust Java long to machine word:
+    ptr = ConvL2X(ptr);
+    adr = make_unsafe_address(NULL, ptr);
+  }
+
+  // Pop receiver last:  it was pushed first.
+  Node *receiver = pop();
+
+  assert(saved_sp == _sp, "must have correct argument count");
+
+  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+
+  // First guess at the value type.
+  const Type *value_type = Type::get_const_basic_type(type);
+
+  // Try to categorize the address.  If it comes up as TypeJavaPtr::BOTTOM,
+  // there was not enough information to nail it down.
+  Compile::AliasType* alias_type = C->alias_type(adr_type);
+  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
+
+  // We will need memory barriers unless we can determine a unique
+  // alias category for this reference.  (Note:  If for some reason
+  // the barriers get omitted and the unsafe reference begins to "pollute"
+  // the alias analysis of the rest of the graph, either Compile::can_alias
+  // or Compile::must_alias will throw a diagnostic assert.)
+  bool need_mem_bar = (alias_type->adr_type() == TypeOopPtr::BOTTOM);
+
+  if (!is_store && type == T_OBJECT) {
+    // Attempt to infer a sharper value type from the offset and base type.
+    ciKlass* sharpened_klass = NULL;
+
+    // See if it is an instance field, with an object type.
+    if (alias_type->field() != NULL) {
+      assert(!is_native_ptr, "native pointer op cannot use a java address");
+      if (alias_type->field()->type()->is_klass()) {
+        sharpened_klass = alias_type->field()->type()->as_klass();
+      }
+    }
+
+    // See if it is a narrow oop array.
+    if (adr_type->isa_aryptr()) {
+      if (adr_type->offset() >= objArrayOopDesc::header_size() * wordSize) {
+        const TypeOopPtr *elem_type = adr_type->is_aryptr()->elem()->isa_oopptr();
+        if (elem_type != NULL) {
+          sharpened_klass = elem_type->klass();
+        }
+      }
+    }
+
+    if (sharpened_klass != NULL) {
+      const TypeOopPtr* tjp = TypeOopPtr::make_from_klass(sharpened_klass);
+
+      // Sharpen the value type.
+      value_type = tjp;
+
+#ifndef PRODUCT
+      if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
+        tty->print("  from base type:  ");   adr_type->dump();
+        tty->print("  sharpened value: "); value_type->dump();
+      }
+#endif
+    }
+  }
+
+  // Null check on self without removing any arguments.  The argument
+  // null check technically happens in the wrong place, which can lead to
+  // invalid stack traces when the primitive is inlined into a method
+  // which handles NullPointerExceptions.
+  _sp += nargs;
+  do_null_check(receiver, T_OBJECT);
+  _sp -= nargs;
+  if (stopped()) {
+    return true;
+  }
+  // Heap pointers get a null-check from the interpreter,
+  // as a courtesy.  However, this is not guaranteed by Unsafe,
+  // and it is not possible to fully distinguish unintended nulls
+  // from intended ones in this API.
+
+  if (is_volatile) {
+    // We need to emit leading and trailing CPU membars (see below) in
+    // addition to memory membars when is_volatile. This is a little
+    // too strong, but avoids the need to insert per-alias-type
+    // volatile membars (for stores; compare Parse::do_put_xxx), which
+    // we cannot do effctively here because we probably only have a
+    // rough approximation of type.
+    need_mem_bar = true;
+    // For Stores, place a memory ordering barrier now.
+    if (is_store)
+      insert_mem_bar(Op_MemBarRelease);
+  }
+
+  // Memory barrier to prevent normal and 'unsafe' accesses from
+  // bypassing each other.  Happens after null checks, so the
+  // exception paths do not take memory state from the memory barrier,
+  // so there's no problems making a strong assert about mixing users
+  // of safe & unsafe memory.  Otherwise fails in a CTW of rt.jar
+  // around 5701, class sun/reflect/UnsafeBooleanFieldAccessorImpl.
+  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
+
+  if (!is_store) {
+    Node* p = make_load(control(), adr, value_type, type, adr_type, is_volatile);
+    // load value and push onto stack
+    switch (type) {
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+    case T_FLOAT:
+    case T_OBJECT:
+      push( p );
+      break;
+    case T_ADDRESS:
+      // Cast to an int type.
+      p = _gvn.transform( new (C, 2) CastP2XNode(NULL,p) );
+      p = ConvX2L(p);
+      push_pair(p);
+      break;
+    case T_DOUBLE:
+    case T_LONG:
+      push_pair( p );
+      break;
+    default: ShouldNotReachHere();
+    }
+  } else {
+    // place effect of store into memory
+    switch (type) {
+    case T_DOUBLE:
+      val = dstore_rounding(val);
+      break;
+    case T_ADDRESS:
+      // Repackage the long as a pointer.
+      val = ConvL2X(val);
+      val = _gvn.transform( new (C, 2) CastX2PNode(val) );
+      break;
+    }
+
+    if (type != T_OBJECT ) {
+      (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
+    } else {
+      // Possibly an oop being stored to Java heap or native memory
+      if (!TypePtr::NULL_PTR->higher_equal(_gvn.type(heap_base_oop))) {
+        // oop to Java heap.
+        (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
+      } else {
+
+        // We can't tell at compile time if we are storing in the Java heap or outside
+        // of it. So we need to emit code to conditionally do the proper type of
+        // store.
+
+        IdealKit kit(gvn(), control(),  merged_memory());
+        kit.declares_done();
+        // QQQ who knows what probability is here??
+        kit.if_then(heap_base_oop, BoolTest::ne, null(), PROB_UNLIKELY(0.999)); {
+          (void) store_oop_to_unknown(control(), heap_base_oop, adr, adr_type, val, val->bottom_type(), type);
+        } kit.else_(); {
+          (void) store_to_memory(control(), adr, val, type, adr_type, is_volatile);
+        } kit.end_if();
+      }
+    }
+  }
+
+  if (is_volatile) {
+    if (!is_store)
+      insert_mem_bar(Op_MemBarAcquire);
+    else
+      insert_mem_bar(Op_MemBarVolatile);
+  }
+
+  if (need_mem_bar) insert_mem_bar(Op_MemBarCPUOrder);
+
+  return true;
+}
+
+//----------------------------inline_unsafe_prefetch----------------------------
+
+bool LibraryCallKit::inline_unsafe_prefetch(bool is_native_ptr, bool is_store, bool is_static) {
+#ifndef PRODUCT
+  {
+    ResourceMark rm;
+    // Check the signatures.
+    ciSignature* sig = signature();
+#ifdef ASSERT
+    // Object getObject(Object base, int/long offset), etc.
+    BasicType rtype = sig->return_type()->basic_type();
+    if (!is_native_ptr) {
+      assert(sig->count() == 2, "oop prefetch has 2 arguments");
+      assert(sig->type_at(0)->basic_type() == T_OBJECT, "prefetch base is object");
+      assert(sig->type_at(1)->basic_type() == T_LONG, "prefetcha offset is correct");
+    } else {
+      assert(sig->count() == 1, "native prefetch has 1 argument");
+      assert(sig->type_at(0)->basic_type() == T_LONG, "prefetch base is long");
+    }
+#endif // ASSERT
+  }
+#endif // !PRODUCT
+
+  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
+
+  // Argument words:  "this" if not static, plus (oop/offset) or (lo/hi) args
+  int nargs = (is_static ? 0 : 1) + (is_native_ptr ? 2 : 3);
+
+  debug_only(int saved_sp = _sp);
+  _sp += nargs;
+
+  // Build address expression.  See the code in inline_unsafe_access.
+  Node *adr;
+  if (!is_native_ptr) {
+    // The offset is a value produced by Unsafe.staticFieldOffset or Unsafe.objectFieldOffset
+    Node* offset = pop_pair();
+    // The base is either a Java object or a value produced by Unsafe.staticFieldBase
+    Node* base   = pop();
+    // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+    // to be plain byte offsets, which are also the same as those accepted
+    // by oopDesc::field_base.
+    assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+           "fieldOffset must be byte-scaled");
+    // 32-bit machines ignore the high half!
+    offset = ConvL2X(offset);
+    adr = make_unsafe_address(base, offset);
+  } else {
+    Node* ptr = pop_pair();
+    // Adjust Java long to machine word:
+    ptr = ConvL2X(ptr);
+    adr = make_unsafe_address(NULL, ptr);
+  }
+
+  if (is_static) {
+    assert(saved_sp == _sp, "must have correct argument count");
+  } else {
+    // Pop receiver last:  it was pushed first.
+    Node *receiver = pop();
+    assert(saved_sp == _sp, "must have correct argument count");
+
+    // Null check on self without removing any arguments.  The argument
+    // null check technically happens in the wrong place, which can lead to
+    // invalid stack traces when the primitive is inlined into a method
+    // which handles NullPointerExceptions.
+    _sp += nargs;
+    do_null_check(receiver, T_OBJECT);
+    _sp -= nargs;
+    if (stopped()) {
+      return true;
+    }
+  }
+
+  // Generate the read or write prefetch
+  Node *prefetch;
+  if (is_store) {
+    prefetch = new (C, 3) PrefetchWriteNode(i_o(), adr);
+  } else {
+    prefetch = new (C, 3) PrefetchReadNode(i_o(), adr);
+  }
+  prefetch->init_req(0, control());
+  set_i_o(_gvn.transform(prefetch));
+
+  return true;
+}
+
+//----------------------------inline_unsafe_CAS----------------------------
+
+bool LibraryCallKit::inline_unsafe_CAS(BasicType type) {
+  // This basic scheme here is the same as inline_unsafe_access, but
+  // differs in enough details that combining them would make the code
+  // overly confusing.  (This is a true fact! I originally combined
+  // them, but even I was confused by it!) As much code/comments as
+  // possible are retained from inline_unsafe_access though to make
+  // the correspondances clearer. - dl
+
+  if (callee()->is_static())  return false;  // caller must have the capability!
+
+#ifndef PRODUCT
+  {
+    ResourceMark rm;
+    // Check the signatures.
+    ciSignature* sig = signature();
+#ifdef ASSERT
+    BasicType rtype = sig->return_type()->basic_type();
+    assert(rtype == T_BOOLEAN, "CAS must return boolean");
+    assert(sig->count() == 4, "CAS has 4 arguments");
+    assert(sig->type_at(0)->basic_type() == T_OBJECT, "CAS base is object");
+    assert(sig->type_at(1)->basic_type() == T_LONG, "CAS offset is long");
+#endif // ASSERT
+  }
+#endif //PRODUCT
+
+  // number of stack slots per value argument (1 or 2)
+  int type_words = type2size[type];
+
+  // Cannot inline wide CAS on machines that don't support it natively
+  if (type2aelembytes[type] > BytesPerInt && !VM_Version::supports_cx8())
+    return false;
+
+  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
+
+  // Argument words:  "this" plus oop plus offset plus oldvalue plus newvalue;
+  int nargs = 1 + 1 + 2  + type_words + type_words;
+
+  // pop arguments: newval, oldval, offset, base, and receiver
+  debug_only(int saved_sp = _sp);
+  _sp += nargs;
+  Node* newval   = (type_words == 1) ? pop() : pop_pair();
+  Node* oldval   = (type_words == 1) ? pop() : pop_pair();
+  Node *offset   = pop_pair();
+  Node *base     = pop();
+  Node *receiver = pop();
+  assert(saved_sp == _sp, "must have correct argument count");
+
+  //  Null check receiver.
+  _sp += nargs;
+  do_null_check(receiver, T_OBJECT);
+  _sp -= nargs;
+  if (stopped()) {
+    return true;
+  }
+
+  // Build field offset expression.
+  // We currently rely on the cookies produced by Unsafe.xxxFieldOffset
+  // to be plain byte offsets, which are also the same as those accepted
+  // by oopDesc::field_base.
+  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
+  // 32-bit machines ignore the high half of long offsets
+  offset = ConvL2X(offset);
+  Node* adr = make_unsafe_address(base, offset);
+  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+
+  // (Unlike inline_unsafe_access, there seems no point in trying
+  // to refine types. Just use the coarse types here.
+  const Type *value_type = Type::get_const_basic_type(type);
+  Compile::AliasType* alias_type = C->alias_type(adr_type);
+  assert(alias_type->index() != Compile::AliasIdxBot, "no bare pointers here");
+  int alias_idx = C->get_alias_index(adr_type);
+
+  // Memory-model-wise, a CAS acts like a little synchronized block,
+  // so needs barriers on each side.  These don't't translate into
+  // actual barriers on most machines, but we still need rest of
+  // compiler to respect ordering.
+
+  insert_mem_bar(Op_MemBarRelease);
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  // 4984716: MemBars must be inserted before this
+  //          memory node in order to avoid a false
+  //          dependency which will confuse the scheduler.
+  Node *mem = memory(alias_idx);
+
+  // For now, we handle only those cases that actually exist: ints,
+  // longs, and Object. Adding others should be straightforward.
+  Node* cas;
+  switch(type) {
+  case T_INT:
+    cas = _gvn.transform(new (C, 5) CompareAndSwapINode(control(), mem, adr, newval, oldval));
+    break;
+  case T_LONG:
+    cas = _gvn.transform(new (C, 5) CompareAndSwapLNode(control(), mem, adr, newval, oldval));
+    break;
+  case T_OBJECT:
+    // reference stores need a store barrier.
+    // (They don't if CAS fails, but it isn't worth checking.)
+    pre_barrier(control(), base, adr, alias_idx, newval, value_type, T_OBJECT);
+    cas = _gvn.transform(new (C, 5) CompareAndSwapPNode(control(), mem, adr, newval, oldval));
+    post_barrier(control(), cas, base, adr, alias_idx, newval, T_OBJECT, true);
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+
+  // SCMemProjNodes represent the memory state of CAS. Their main
+  // role is to prevent CAS nodes from being optimized away when their
+  // results aren't used.
+  Node* proj = _gvn.transform( new (C, 1) SCMemProjNode(cas));
+  set_memory(proj, alias_idx);
+
+  // Add the trailing membar surrounding the access
+  insert_mem_bar(Op_MemBarCPUOrder);
+  insert_mem_bar(Op_MemBarAcquire);
+
+  push(cas);
+  return true;
+}
+
+bool LibraryCallKit::inline_unsafe_ordered_store(BasicType type) {
+  // This is another variant of inline_unsafe_access, differing in
+  // that it always issues store-store ("release") barrier and ensures
+  // store-atomicity (which only matters for "long").
+
+  if (callee()->is_static())  return false;  // caller must have the capability!
+
+#ifndef PRODUCT
+  {
+    ResourceMark rm;
+    // Check the signatures.
+    ciSignature* sig = signature();
+#ifdef ASSERT
+    BasicType rtype = sig->return_type()->basic_type();
+    assert(rtype == T_VOID, "must return void");
+    assert(sig->count() == 3, "has 3 arguments");
+    assert(sig->type_at(0)->basic_type() == T_OBJECT, "base is object");
+    assert(sig->type_at(1)->basic_type() == T_LONG, "offset is long");
+#endif // ASSERT
+  }
+#endif //PRODUCT
+
+  // number of stack slots per value argument (1 or 2)
+  int type_words = type2size[type];
+
+  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
+
+  // Argument words:  "this" plus oop plus offset plus value;
+  int nargs = 1 + 1 + 2 + type_words;
+
+  // pop arguments: val, offset, base, and receiver
+  debug_only(int saved_sp = _sp);
+  _sp += nargs;
+  Node* val      = (type_words == 1) ? pop() : pop_pair();
+  Node *offset   = pop_pair();
+  Node *base     = pop();
+  Node *receiver = pop();
+  assert(saved_sp == _sp, "must have correct argument count");
+
+  //  Null check receiver.
+  _sp += nargs;
+  do_null_check(receiver, T_OBJECT);
+  _sp -= nargs;
+  if (stopped()) {
+    return true;
+  }
+
+  // Build field offset expression.
+  assert(Unsafe_field_offset_to_byte_offset(11) == 11, "fieldOffset must be byte-scaled");
+  // 32-bit machines ignore the high half of long offsets
+  offset = ConvL2X(offset);
+  Node* adr = make_unsafe_address(base, offset);
+  const TypePtr *adr_type = _gvn.type(adr)->isa_ptr();
+  const Type *value_type = Type::get_const_basic_type(type);
+  Compile::AliasType* alias_type = C->alias_type(adr_type);
+
+  insert_mem_bar(Op_MemBarRelease);
+  insert_mem_bar(Op_MemBarCPUOrder);
+  // Ensure that the store is atomic for longs:
+  bool require_atomic_access = true;
+  Node* store;
+  if (type == T_OBJECT) // reference stores need a store barrier.
+    store = store_oop_to_unknown(control(), base, adr, adr_type, val, value_type, type);
+  else {
+    store = store_to_memory(control(), adr, val, type, adr_type, require_atomic_access);
+  }
+  insert_mem_bar(Op_MemBarCPUOrder);
+  return true;
+}
+
+bool LibraryCallKit::inline_unsafe_allocate() {
+  if (callee()->is_static())  return false;  // caller must have the capability!
+  int nargs = 1 + 1;
+  assert(signature()->size() == nargs-1, "alloc has 1 argument");
+  null_check_receiver(callee());  // check then ignore argument(0)
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  Node* cls = do_null_check(argument(1), T_OBJECT);
+  _sp -= nargs;
+  if (stopped())  return true;
+
+  Node* kls = load_klass_from_mirror(cls, false, nargs, NULL, 0);
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  kls = do_null_check(kls, T_OBJECT);
+  _sp -= nargs;
+  if (stopped())  return true;  // argument was like int.class
+
+  // Note:  The argument might still be an illegal value like
+  // Serializable.class or Object[].class.   The runtime will handle it.
+  // But we must make an explicit check for initialization.
+  Node* insp = basic_plus_adr(kls, instanceKlass::init_state_offset_in_bytes() + sizeof(oopDesc));
+  Node* inst = make_load(NULL, insp, TypeInt::INT, T_INT);
+  Node* bits = intcon(instanceKlass::fully_initialized);
+  Node* test = _gvn.transform( new (C, 3) SubINode(inst, bits) );
+  // The 'test' is non-zero if we need to take a slow path.
+
+  Node* obj = new_instance(kls, test);
+  push(obj);
+
+  return true;
+}
+
+//------------------------inline_native_time_funcs--------------
+// inline code for System.currentTimeMillis() and System.nanoTime()
+// these have the same type and signature
+bool LibraryCallKit::inline_native_time_funcs(bool isNano) {
+  address funcAddr = isNano ? CAST_FROM_FN_PTR(address, os::javaTimeNanos) :
+                              CAST_FROM_FN_PTR(address, os::javaTimeMillis);
+  const char * funcName = isNano ? "nanoTime" : "currentTimeMillis";
+  const TypeFunc *tf = OptoRuntime::current_time_millis_Type();
+  const TypePtr* no_memory_effects = NULL;
+  Node* time = make_runtime_call(RC_LEAF, tf, funcAddr, funcName, no_memory_effects);
+  Node* value = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms+0));
+#ifdef ASSERT
+  Node* value_top = _gvn.transform(new (C, 1) ProjNode(time, TypeFunc::Parms + 1));
+  assert(value_top == top(), "second value must be top");
+#endif
+  push_pair(value);
+  return true;
+}
+
+//------------------------inline_native_currentThread------------------
+bool LibraryCallKit::inline_native_currentThread() {
+  Node* junk = NULL;
+  push(generate_current_thread(junk));
+  return true;
+}
+
+//------------------------inline_native_isInterrupted------------------
+bool LibraryCallKit::inline_native_isInterrupted() {
+  const int nargs = 1+1;  // receiver + boolean
+  assert(nargs == arg_size(), "sanity");
+  // Add a fast path to t.isInterrupted(clear_int):
+  //   (t == Thread.current() && (!TLS._osthread._interrupted || !clear_int))
+  //   ? TLS._osthread._interrupted : /*slow path:*/ t.isInterrupted(clear_int)
+  // So, in the common case that the interrupt bit is false,
+  // we avoid making a call into the VM.  Even if the interrupt bit
+  // is true, if the clear_int argument is false, we avoid the VM call.
+  // However, if the receiver is not currentThread, we must call the VM,
+  // because there must be some locking done around the operation.
+
+  // We only go to the fast case code if we pass two guards.
+  // Paths which do not pass are accumulated in the slow_region.
+  RegionNode* slow_region = new (C, 1) RegionNode(1);
+  record_for_igvn(slow_region);
+  RegionNode* result_rgn = new (C, 4) RegionNode(1+3); // fast1, fast2, slow
+  PhiNode*    result_val = new (C, 4) PhiNode(result_rgn, TypeInt::BOOL);
+  enum { no_int_result_path   = 1,
+         no_clear_result_path = 2,
+         slow_result_path     = 3
+  };
+
+  // (a) Receiving thread must be the current thread.
+  Node* rec_thr = argument(0);
+  Node* tls_ptr = NULL;
+  Node* cur_thr = generate_current_thread(tls_ptr);
+  Node* cmp_thr = _gvn.transform( new (C, 3) CmpPNode(cur_thr, rec_thr) );
+  Node* bol_thr = _gvn.transform( new (C, 2) BoolNode(cmp_thr, BoolTest::ne) );
+
+  bool known_current_thread = (_gvn.type(bol_thr) == TypeInt::ZERO);
+  if (!known_current_thread)
+    generate_slow_guard(bol_thr, slow_region);
+
+  // (b) Interrupt bit on TLS must be false.
+  Node* p = basic_plus_adr(top()/*!oop*/, tls_ptr, in_bytes(JavaThread::osthread_offset()));
+  Node* osthread = make_load(NULL, p, TypeRawPtr::NOTNULL, T_ADDRESS);
+  p = basic_plus_adr(top()/*!oop*/, osthread, in_bytes(OSThread::interrupted_offset()));
+  Node* int_bit = make_load(NULL, p, TypeInt::BOOL, T_INT);
+  Node* cmp_bit = _gvn.transform( new (C, 3) CmpINode(int_bit, intcon(0)) );
+  Node* bol_bit = _gvn.transform( new (C, 2) BoolNode(cmp_bit, BoolTest::ne) );
+
+  IfNode* iff_bit = create_and_map_if(control(), bol_bit, PROB_UNLIKELY_MAG(3), COUNT_UNKNOWN);
+
+  // First fast path:  if (!TLS._interrupted) return false;
+  Node* false_bit = _gvn.transform( new (C, 1) IfFalseNode(iff_bit) );
+  result_rgn->init_req(no_int_result_path, false_bit);
+  result_val->init_req(no_int_result_path, intcon(0));
+
+  // drop through to next case
+  set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_bit)) );
+
+  // (c) Or, if interrupt bit is set and clear_int is false, use 2nd fast path.
+  Node* clr_arg = argument(1);
+  Node* cmp_arg = _gvn.transform( new (C, 3) CmpINode(clr_arg, intcon(0)) );
+  Node* bol_arg = _gvn.transform( new (C, 2) BoolNode(cmp_arg, BoolTest::ne) );
+  IfNode* iff_arg = create_and_map_if(control(), bol_arg, PROB_FAIR, COUNT_UNKNOWN);
+
+  // Second fast path:  ... else if (!clear_int) return true;
+  Node* false_arg = _gvn.transform( new (C, 1) IfFalseNode(iff_arg) );
+  result_rgn->init_req(no_clear_result_path, false_arg);
+  result_val->init_req(no_clear_result_path, intcon(1));
+
+  // drop through to next case
+  set_control( _gvn.transform(new (C, 1) IfTrueNode(iff_arg)) );
+
+  // (d) Otherwise, go to the slow path.
+  slow_region->add_req(control());
+  set_control( _gvn.transform(slow_region) );
+
+  if (stopped()) {
+    // There is no slow path.
+    result_rgn->init_req(slow_result_path, top());
+    result_val->init_req(slow_result_path, top());
+  } else {
+    // non-virtual because it is a private non-static
+    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_isInterrupted);
+
+    Node* slow_val = set_results_for_java_call(slow_call);
+    // this->control() comes from set_results_for_java_call
+
+    // If we know that the result of the slow call will be true, tell the optimizer!
+    if (known_current_thread)  slow_val = intcon(1);
+
+    Node* fast_io  = slow_call->in(TypeFunc::I_O);
+    Node* fast_mem = slow_call->in(TypeFunc::Memory);
+    // These two phis are pre-filled with copies of of the fast IO and Memory
+    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
+    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+
+    result_rgn->init_req(slow_result_path, control());
+    io_phi    ->init_req(slow_result_path, i_o());
+    mem_phi   ->init_req(slow_result_path, reset_memory());
+    result_val->init_req(slow_result_path, slow_val);
+
+    set_all_memory( _gvn.transform(mem_phi) );
+    set_i_o(        _gvn.transform(io_phi) );
+  }
+
+  push_result(result_rgn, result_val);
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return true;
+}
+
+//---------------------------load_mirror_from_klass----------------------------
+// Given a klass oop, load its java mirror (a java.lang.Class oop).
+Node* LibraryCallKit::load_mirror_from_klass(Node* klass) {
+  Node* p = basic_plus_adr(klass, Klass::java_mirror_offset_in_bytes() + sizeof(oopDesc));
+  return make_load(NULL, p, TypeInstPtr::MIRROR, T_OBJECT);
+}
+
+//-----------------------load_klass_from_mirror_common-------------------------
+// Given a java mirror (a java.lang.Class oop), load its corresponding klass oop.
+// Test the klass oop for null (signifying a primitive Class like Integer.TYPE),
+// and branch to the given path on the region.
+// If never_see_null, take an uncommon trap on null, so we can optimistically
+// compile for the non-null case.
+// If the region is NULL, force never_see_null = true.
+Node* LibraryCallKit::load_klass_from_mirror_common(Node* mirror,
+                                                    bool never_see_null,
+                                                    int nargs,
+                                                    RegionNode* region,
+                                                    int null_path,
+                                                    int offset) {
+  if (region == NULL)  never_see_null = true;
+  Node* p = basic_plus_adr(mirror, offset);
+  const TypeKlassPtr*  kls_type = TypeKlassPtr::OBJECT_OR_NULL;
+  Node* kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, kls_type));
+  _sp += nargs; // any deopt will start just before call to enclosing method
+  Node* null_ctl = top();
+  kls = null_check_oop(kls, &null_ctl, never_see_null);
+  if (region != NULL) {
+    // Set region->in(null_path) if the mirror is a primitive (e.g, int.class).
+    region->init_req(null_path, null_ctl);
+  } else {
+    assert(null_ctl == top(), "no loose ends");
+  }
+  _sp -= nargs;
+  return kls;
+}
+
+//--------------------(inline_native_Class_query helpers)---------------------
+// Use this for JVM_ACC_INTERFACE, JVM_ACC_IS_CLONEABLE, JVM_ACC_HAS_FINALIZER.
+// Fall through if (mods & mask) == bits, take the guard otherwise.
+Node* LibraryCallKit::generate_access_flags_guard(Node* kls, int modifier_mask, int modifier_bits, RegionNode* region) {
+  // Branch around if the given klass has the given modifier bit set.
+  // Like generate_guard, adds a new path onto the region.
+  Node* modp = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* mods = make_load(NULL, modp, TypeInt::INT, T_INT);
+  Node* mask = intcon(modifier_mask);
+  Node* bits = intcon(modifier_bits);
+  Node* mbit = _gvn.transform( new (C, 3) AndINode(mods, mask) );
+  Node* cmp  = _gvn.transform( new (C, 3) CmpINode(mbit, bits) );
+  Node* bol  = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ne) );
+  return generate_fair_guard(bol, region);
+}
+Node* LibraryCallKit::generate_interface_guard(Node* kls, RegionNode* region) {
+  return generate_access_flags_guard(kls, JVM_ACC_INTERFACE, 0, region);
+}
+
+//-------------------------inline_native_Class_query-------------------
+bool LibraryCallKit::inline_native_Class_query(vmIntrinsics::ID id) {
+  int nargs = 1+0;  // just the Class mirror, in most cases
+  const Type* return_type = TypeInt::BOOL;
+  Node* prim_return_value = top();  // what happens if it's a primitive class?
+  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+  bool expect_prim = false;     // most of these guys expect to work on refs
+
+  enum { _normal_path = 1, _prim_path = 2, PATH_LIMIT };
+
+  switch (id) {
+  case vmIntrinsics::_isInstance:
+    nargs = 1+1;  // the Class mirror, plus the object getting queried about
+    // nothing is an instance of a primitive type
+    prim_return_value = intcon(0);
+    break;
+  case vmIntrinsics::_getModifiers:
+    prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
+    assert(is_power_of_2((int)JVM_ACC_WRITTEN_FLAGS+1), "change next line");
+    return_type = TypeInt::make(0, JVM_ACC_WRITTEN_FLAGS, Type::WidenMin);
+    break;
+  case vmIntrinsics::_isInterface:
+    prim_return_value = intcon(0);
+    break;
+  case vmIntrinsics::_isArray:
+    prim_return_value = intcon(0);
+    expect_prim = true;  // cf. ObjectStreamClass.getClassSignature
+    break;
+  case vmIntrinsics::_isPrimitive:
+    prim_return_value = intcon(1);
+    expect_prim = true;  // obviously
+    break;
+  case vmIntrinsics::_getSuperclass:
+    prim_return_value = null();
+    return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
+    break;
+  case vmIntrinsics::_getComponentType:
+    prim_return_value = null();
+    return_type = TypeInstPtr::MIRROR->cast_to_ptr_type(TypePtr::BotPTR);
+    break;
+  case vmIntrinsics::_getClassAccessFlags:
+    prim_return_value = intcon(JVM_ACC_ABSTRACT | JVM_ACC_FINAL | JVM_ACC_PUBLIC);
+    return_type = TypeInt::INT;  // not bool!  6297094
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+
+  Node* mirror =                      argument(0);
+  Node* obj    = (nargs <= 1)? top(): argument(1);
+
+  const TypeInstPtr* mirror_con = _gvn.type(mirror)->isa_instptr();
+  if (mirror_con == NULL)  return false;  // cannot happen?
+
+#ifndef PRODUCT
+  if (PrintIntrinsics || PrintInlining || PrintOptoInlining) {
+    ciType* k = mirror_con->java_mirror_type();
+    if (k) {
+      tty->print("Inlining %s on constant Class ", vmIntrinsics::name_at(intrinsic_id()));
+      k->print_name();
+      tty->cr();
+    }
+  }
+#endif
+
+  // Null-check the mirror, and the mirror's klass ptr (in case it is a primitive).
+  RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  record_for_igvn(region);
+  PhiNode* phi = new (C, PATH_LIMIT) PhiNode(region, return_type);
+
+  // The mirror will never be null of Reflection.getClassAccessFlags, however
+  // it may be null for Class.isInstance or Class.getModifiers. Throw a NPE
+  // if it is. See bug 4774291.
+
+  // For Reflection.getClassAccessFlags(), the null check occurs in
+  // the wrong place; see inline_unsafe_access(), above, for a similar
+  // situation.
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  mirror = do_null_check(mirror, T_OBJECT);
+  _sp -= nargs;
+  // If mirror or obj is dead, only null-path is taken.
+  if (stopped())  return true;
+
+  if (expect_prim)  never_see_null = false;  // expect nulls (meaning prims)
+
+  // Now load the mirror's klass metaobject, and null-check it.
+  // Side-effects region with the control path if the klass is null.
+  Node* kls = load_klass_from_mirror(mirror, never_see_null, nargs,
+                                     region, _prim_path);
+  // If kls is null, we have a primitive mirror.
+  phi->init_req(_prim_path, prim_return_value);
+  if (stopped()) { push_result(region, phi); return true; }
+
+  Node* p;  // handy temp
+  Node* null_ctl;
+
+  // Now that we have the non-null klass, we can perform the real query.
+  // For constant classes, the query will constant-fold in LoadNode::Value.
+  Node* query_value = top();
+  switch (id) {
+  case vmIntrinsics::_isInstance:
+    // nothing is an instance of a primitive type
+    query_value = gen_instanceof(obj, kls);
+    break;
+
+  case vmIntrinsics::_getModifiers:
+    p = basic_plus_adr(kls, Klass::modifier_flags_offset_in_bytes() + sizeof(oopDesc));
+    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+    break;
+
+  case vmIntrinsics::_isInterface:
+    // (To verify this code sequence, check the asserts in JVM_IsInterface.)
+    if (generate_interface_guard(kls, region) != NULL)
+      // A guard was added.  If the guard is taken, it was an interface.
+      phi->add_req(intcon(1));
+    // If we fall through, it's a plain class.
+    query_value = intcon(0);
+    break;
+
+  case vmIntrinsics::_isArray:
+    // (To verify this code sequence, check the asserts in JVM_IsArrayClass.)
+    if (generate_array_guard(kls, region) != NULL)
+      // A guard was added.  If the guard is taken, it was an array.
+      phi->add_req(intcon(1));
+    // If we fall through, it's a plain class.
+    query_value = intcon(0);
+    break;
+
+  case vmIntrinsics::_isPrimitive:
+    query_value = intcon(0); // "normal" path produces false
+    break;
+
+  case vmIntrinsics::_getSuperclass:
+    // The rules here are somewhat unfortunate, but we can still do better
+    // with random logic than with a JNI call.
+    // Interfaces store null or Object as _super, but must report null.
+    // Arrays store an intermediate super as _super, but must report Object.
+    // Other types can report the actual _super.
+    // (To verify this code sequence, check the asserts in JVM_IsInterface.)
+    if (generate_interface_guard(kls, region) != NULL)
+      // A guard was added.  If the guard is taken, it was an interface.
+      phi->add_req(null());
+    if (generate_array_guard(kls, region) != NULL)
+      // A guard was added.  If the guard is taken, it was an array.
+      phi->add_req(makecon(TypeInstPtr::make(env()->Object_klass()->java_mirror())));
+    // If we fall through, it's a plain class.  Get its _super.
+    p = basic_plus_adr(kls, Klass::super_offset_in_bytes() + sizeof(oopDesc));
+    kls = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeRawPtr::BOTTOM, TypeKlassPtr::OBJECT_OR_NULL));
+    null_ctl = top();
+    kls = null_check_oop(kls, &null_ctl);
+    if (null_ctl != top()) {
+      // If the guard is taken, Object.superClass is null (both klass and mirror).
+      region->add_req(null_ctl);
+      phi   ->add_req(null());
+    }
+    if (!stopped()) {
+      query_value = load_mirror_from_klass(kls);
+    }
+    break;
+
+  case vmIntrinsics::_getComponentType:
+    if (generate_array_guard(kls, region) != NULL) {
+      // Be sure to pin the oop load to the guard edge just created:
+      Node* is_array_ctrl = region->in(region->req()-1);
+      Node* cma = basic_plus_adr(kls, in_bytes(arrayKlass::component_mirror_offset()) + sizeof(oopDesc));
+      Node* cmo = make_load(is_array_ctrl, cma, TypeInstPtr::MIRROR, T_OBJECT);
+      phi->add_req(cmo);
+    }
+    query_value = null();  // non-array case is null
+    break;
+
+  case vmIntrinsics::_getClassAccessFlags:
+    p = basic_plus_adr(kls, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+    query_value = make_load(NULL, p, TypeInt::INT, T_INT);
+    break;
+
+  default:
+    ShouldNotReachHere();
+  }
+
+  // Fall-through is the normal case of a query to a real class.
+  phi->init_req(1, query_value);
+  region->init_req(1, control());
+
+  push_result(region, phi);
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return true;
+}
+
+//--------------------------inline_native_subtype_check------------------------
+// This intrinsic takes the JNI calls out of the heart of
+// UnsafeFieldAccessorImpl.set, which improves Field.set, readObject, etc.
+bool LibraryCallKit::inline_native_subtype_check() {
+  int nargs = 1+1;  // the Class mirror, plus the other class getting examined
+
+  // Pull both arguments off the stack.
+  Node* args[2];                // two java.lang.Class mirrors: superc, subc
+  args[0] = argument(0);
+  args[1] = argument(1);
+  Node* klasses[2];             // corresponding Klasses: superk, subk
+  klasses[0] = klasses[1] = top();
+
+  enum {
+    // A full decision tree on {superc is prim, subc is prim}:
+    _prim_0_path = 1,           // {P,N} => false
+                                // {P,P} & superc!=subc => false
+    _prim_same_path,            // {P,P} & superc==subc => true
+    _prim_1_path,               // {N,P} => false
+    _ref_subtype_path,          // {N,N} & subtype check wins => true
+    _both_ref_path,             // {N,N} & subtype check loses => false
+    PATH_LIMIT
+  };
+
+  RegionNode* region = new (C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  Node*       phi    = new (C, PATH_LIMIT) PhiNode(region, TypeInt::BOOL);
+  record_for_igvn(region);
+
+  const TypePtr* adr_type = TypeRawPtr::BOTTOM;   // memory type of loads
+  const TypeKlassPtr* kls_type = TypeKlassPtr::OBJECT_OR_NULL;
+  int class_klass_offset = java_lang_Class::klass_offset_in_bytes();
+
+  // First null-check both mirrors and load each mirror's klass metaobject.
+  int which_arg;
+  for (which_arg = 0; which_arg <= 1; which_arg++) {
+    Node* arg = args[which_arg];
+    _sp += nargs;  // set original stack for use by uncommon_trap
+    arg = do_null_check(arg, T_OBJECT);
+    _sp -= nargs;
+    if (stopped())  break;
+    args[which_arg] = _gvn.transform(arg);
+
+    Node* p = basic_plus_adr(arg, class_klass_offset);
+    Node* kls = new (C, 3) LoadKlassNode(0, immutable_memory(), p, adr_type, kls_type);
+    klasses[which_arg] = _gvn.transform(kls);
+  }
+
+  // Having loaded both klasses, test each for null.
+  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+  for (which_arg = 0; which_arg <= 1; which_arg++) {
+    Node* kls = klasses[which_arg];
+    Node* null_ctl = top();
+    _sp += nargs;  // set original stack for use by uncommon_trap
+    kls = null_check_oop(kls, &null_ctl, never_see_null);
+    _sp -= nargs;
+    int prim_path = (which_arg == 0 ? _prim_0_path : _prim_1_path);
+    region->init_req(prim_path, null_ctl);
+    if (stopped())  break;
+    klasses[which_arg] = kls;
+  }
+
+  if (!stopped()) {
+    // now we have two reference types, in klasses[0..1]
+    Node* subk   = klasses[1];  // the argument to isAssignableFrom
+    Node* superk = klasses[0];  // the receiver
+    region->set_req(_both_ref_path, gen_subtype_check(subk, superk));
+    // now we have a successful reference subtype check
+    region->set_req(_ref_subtype_path, control());
+  }
+
+  // If both operands are primitive (both klasses null), then
+  // we must return true when they are identical primitives.
+  // It is convenient to test this after the first null klass check.
+  set_control(region->in(_prim_0_path)); // go back to first null check
+  if (!stopped()) {
+    // Since superc is primitive, make a guard for the superc==subc case.
+    Node* cmp_eq = _gvn.transform( new (C, 3) CmpPNode(args[0], args[1]) );
+    Node* bol_eq = _gvn.transform( new (C, 2) BoolNode(cmp_eq, BoolTest::eq) );
+    generate_guard(bol_eq, region, PROB_FAIR);
+    if (region->req() == PATH_LIMIT+1) {
+      // A guard was added.  If the added guard is taken, superc==subc.
+      region->swap_edges(PATH_LIMIT, _prim_same_path);
+      region->del_req(PATH_LIMIT);
+    }
+    region->set_req(_prim_0_path, control()); // Not equal after all.
+  }
+
+  // these are the only paths that produce 'true':
+  phi->set_req(_prim_same_path,   intcon(1));
+  phi->set_req(_ref_subtype_path, intcon(1));
+
+  // pull together the cases:
+  assert(region->req() == PATH_LIMIT, "sane region");
+  for (uint i = 1; i < region->req(); i++) {
+    Node* ctl = region->in(i);
+    if (ctl == NULL || ctl == top()) {
+      region->set_req(i, top());
+      phi   ->set_req(i, top());
+    } else if (phi->in(i) == NULL) {
+      phi->set_req(i, intcon(0)); // all other paths produce 'false'
+    }
+  }
+
+  set_control(_gvn.transform(region));
+  push(_gvn.transform(phi));
+
+  return true;
+}
+
+//---------------------generate_array_guard_common------------------------
+Node* LibraryCallKit::generate_array_guard_common(Node* kls, RegionNode* region,
+                                                  bool obj_array, bool not_array) {
+  // If obj_array/non_array==false/false:
+  // Branch around if the given klass is in fact an array (either obj or prim).
+  // If obj_array/non_array==false/true:
+  // Branch around if the given klass is not an array klass of any kind.
+  // If obj_array/non_array==true/true:
+  // Branch around if the kls is not an oop array (kls is int[], String, etc.)
+  // If obj_array/non_array==true/false:
+  // Branch around if the kls is an oop array (Object[] or subtype)
+  //
+  // Like generate_guard, adds a new path onto the region.
+  jint  layout_con = 0;
+  Node* layout_val = get_layout_helper(kls, layout_con);
+  if (layout_val == NULL) {
+    bool query = (obj_array
+                  ? Klass::layout_helper_is_objArray(layout_con)
+                  : Klass::layout_helper_is_javaArray(layout_con));
+    if (query == not_array) {
+      return NULL;                       // never a branch
+    } else {                             // always a branch
+      Node* always_branch = control();
+      if (region != NULL)
+        region->add_req(always_branch);
+      set_control(top());
+      return always_branch;
+    }
+  }
+  // Now test the correct condition.
+  jint  nval = (obj_array
+                ? ((jint)Klass::_lh_array_tag_type_value
+                   <<    Klass::_lh_array_tag_shift)
+                : Klass::_lh_neutral_value);
+  Node* cmp = _gvn.transform( new(C, 3) CmpINode(layout_val, intcon(nval)) );
+  BoolTest::mask btest = BoolTest::lt;  // correct for testing is_[obj]array
+  // invert the test if we are looking for a non-array
+  if (not_array)  btest = BoolTest(btest).negate();
+  Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, btest) );
+  return generate_fair_guard(bol, region);
+}
+
+
+//-----------------------inline_native_newArray--------------------------
+bool LibraryCallKit::inline_native_newArray() {
+  int nargs = 2;
+  Node* mirror    = argument(0);
+  Node* count_val = argument(1);
+
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  mirror = do_null_check(mirror, T_OBJECT);
+  _sp -= nargs;
+
+  enum { _normal_path = 1, _slow_path = 2, PATH_LIMIT };
+  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+                                                      TypeInstPtr::NOTNULL);
+  PhiNode*    result_io  = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+                                                      TypePtr::BOTTOM);
+
+  bool never_see_null = !too_many_traps(Deoptimization::Reason_null_check);
+  Node* klass_node = load_array_klass_from_mirror(mirror, never_see_null,
+                                                  nargs,
+                                                  result_reg, _slow_path);
+  Node* normal_ctl   = control();
+  Node* no_array_ctl = result_reg->in(_slow_path);
+
+  // Generate code for the slow case.  We make a call to newArray().
+  set_control(no_array_ctl);
+  if (!stopped()) {
+    // Either the input type is void.class, or else the
+    // array klass has not yet been cached.  Either the
+    // ensuing call will throw an exception, or else it
+    // will cache the array klass for next time.
+    PreserveJVMState pjvms(this);
+    CallJavaNode* slow_call = generate_method_call_static(vmIntrinsics::_newArray);
+    Node* slow_result = set_results_for_java_call(slow_call);
+    // this->control() comes from set_results_for_java_call
+    result_reg->set_req(_slow_path, control());
+    result_val->set_req(_slow_path, slow_result);
+    result_io ->set_req(_slow_path, i_o());
+    result_mem->set_req(_slow_path, reset_memory());
+  }
+
+  set_control(normal_ctl);
+  if (!stopped()) {
+    // Normal case:  The array type has been cached in the java.lang.Class.
+    // The following call works fine even if the array type is polymorphic.
+    // It could be a dynamic mix of int[], boolean[], Object[], etc.
+    _sp += nargs;  // set original stack for use by uncommon_trap
+    Node* obj = new_array(klass_node, count_val);
+    _sp -= nargs;
+    result_reg->init_req(_normal_path, control());
+    result_val->init_req(_normal_path, obj);
+    result_io ->init_req(_normal_path, i_o());
+    result_mem->init_req(_normal_path, reset_memory());
+  }
+
+  // Return the combined state.
+  set_i_o(        _gvn.transform(result_io)  );
+  set_all_memory( _gvn.transform(result_mem) );
+  push_result(result_reg, result_val);
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return true;
+}
+
+//----------------------inline_native_getLength--------------------------
+bool LibraryCallKit::inline_native_getLength() {
+  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
+
+  int nargs = 1;
+  Node* array = argument(0);
+
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  array = do_null_check(array, T_OBJECT);
+  _sp -= nargs;
+
+  // If array is dead, only null-path is taken.
+  if (stopped())  return true;
+
+  // Deoptimize if it is a non-array.
+  Node* non_array = generate_non_array_guard(load_object_klass(array), NULL);
+
+  if (non_array != NULL) {
+    PreserveJVMState pjvms(this);
+    set_control(non_array);
+    _sp += nargs;  // push the arguments back on the stack
+    uncommon_trap(Deoptimization::Reason_intrinsic,
+                  Deoptimization::Action_maybe_recompile);
+  }
+
+  // If control is dead, only non-array-path is taken.
+  if (stopped())  return true;
+
+  // The works fine even if the array type is polymorphic.
+  // It could be a dynamic mix of int[], boolean[], Object[], etc.
+  push( load_array_length(array) );
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return true;
+}
+
+//------------------------inline_array_copyOf----------------------------
+bool LibraryCallKit::inline_array_copyOf(bool is_copyOfRange) {
+  if (too_many_traps(Deoptimization::Reason_intrinsic))  return false;
+
+  // Restore the stack and pop off the arguments.
+  int nargs = 3 + (is_copyOfRange? 1: 0);
+  Node* original          = argument(0);
+  Node* start             = is_copyOfRange? argument(1): intcon(0);
+  Node* end               = is_copyOfRange? argument(2): argument(1);
+  Node* array_type_mirror = is_copyOfRange? argument(3): argument(2);
+
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  array_type_mirror = do_null_check(array_type_mirror, T_OBJECT);
+  original          = do_null_check(original, T_OBJECT);
+  _sp -= nargs;
+
+  // Check if a null path was taken unconditionally.
+  if (stopped())  return true;
+
+  Node* orig_length = load_array_length(original);
+
+  Node* klass_node = load_klass_from_mirror(array_type_mirror, false, nargs,
+                                            NULL, 0);
+  _sp += nargs;  // set original stack for use by uncommon_trap
+  klass_node = do_null_check(klass_node, T_OBJECT);
+  _sp -= nargs;
+
+  RegionNode* bailout = new (C, 1) RegionNode(1);
+  record_for_igvn(bailout);
+
+  // Despite the generic type of Arrays.copyOf, the mirror might be int, int[], etc.
+  // Bail out if that is so.
+  Node* not_objArray = generate_non_objArray_guard(klass_node, bailout);
+  if (not_objArray != NULL) {
+    // Improve the klass node's type from the new optimistic assumption:
+    ciKlass* ak = ciArrayKlass::make(env()->Object_klass());
+    const Type* akls = TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
+    Node* cast = new (C, 2) CastPPNode(klass_node, akls);
+    cast->init_req(0, control());
+    klass_node = _gvn.transform(cast);
+  }
+
+  // Bail out if either start or end is negative.
+  generate_negative_guard(start, bailout, &start);
+  generate_negative_guard(end,   bailout, &end);
+
+  Node* length = end;
+  if (_gvn.type(start) != TypeInt::ZERO) {
+    length = _gvn.transform( new (C, 3) SubINode(end, start) );
+  }
+
+  // Bail out if length is negative.
+  // ...Not needed, since the new_array will throw the right exception.
+  //generate_negative_guard(length, bailout, &length);
+
+  if (bailout->req() > 1) {
+    PreserveJVMState pjvms(this);
+    set_control( _gvn.transform(bailout) );
+    _sp += nargs;  // push the arguments back on the stack
+    uncommon_trap(Deoptimization::Reason_intrinsic,
+                  Deoptimization::Action_maybe_recompile);
+  }
+
+  if (!stopped()) {
+    // How many elements will we copy from the original?
+    // The answer is MinI(orig_length - start, length).
+    Node* orig_tail = _gvn.transform( new(C, 3) SubINode(orig_length, start) );
+    Node* moved = generate_min_max(vmIntrinsics::_min, orig_tail, length);
+
+    _sp += nargs;  // set original stack for use by uncommon_trap
+    Node* newcopy = new_array(klass_node, length);
+    _sp -= nargs;
+
+    // Generate a direct call to the right arraycopy function(s).
+    // We know the copy is disjoint but we might not know if the
+    // oop stores need checking.
+    // Extreme case:  Arrays.copyOf((Integer[])x, 10, String[].class).
+    // This will fail a store-check if x contains any non-nulls.
+    bool disjoint_bases = true;
+    bool length_never_negative = true;
+    generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+                       original, start, newcopy, intcon(0), moved,
+                       nargs, disjoint_bases, length_never_negative);
+
+    push(newcopy);
+  }
+
+  C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+  return true;
+}
+
+
+//----------------------generate_virtual_guard---------------------------
+// Helper for hashCode and clone.  Peeks inside the vtable to avoid a call.
+Node* LibraryCallKit::generate_virtual_guard(Node* obj_klass,
+                                             RegionNode* slow_region) {
+  ciMethod* method = callee();
+  int vtable_index = method->vtable_index();
+  // Get the methodOop out of the appropriate vtable entry.
+  int entry_offset  = (instanceKlass::vtable_start_offset() +
+                     vtable_index*vtableEntry::size()) * wordSize +
+                     vtableEntry::method_offset_in_bytes();
+  Node* entry_addr  = basic_plus_adr(obj_klass, entry_offset);
+  Node* target_call = make_load(NULL, entry_addr, TypeInstPtr::NOTNULL, T_OBJECT);
+
+  // Compare the target method with the expected method (e.g., Object.hashCode).
+  const TypeInstPtr* native_call_addr = TypeInstPtr::make(method);
+
+  Node* native_call = makecon(native_call_addr);
+  Node* chk_native  = _gvn.transform( new(C, 3) CmpPNode(target_call, native_call) );
+  Node* test_native = _gvn.transform( new(C, 2) BoolNode(chk_native, BoolTest::ne) );
+
+  return generate_slow_guard(test_native, slow_region);
+}
+
+//-----------------------generate_method_call----------------------------
+// Use generate_method_call to make a slow-call to the real
+// method if the fast path fails.  An alternative would be to
+// use a stub like OptoRuntime::slow_arraycopy_Java.
+// This only works for expanding the current library call,
+// not another intrinsic.  (E.g., don't use this for making an
+// arraycopy call inside of the copyOf intrinsic.)
+CallJavaNode*
+LibraryCallKit::generate_method_call(vmIntrinsics::ID method_id, bool is_virtual, bool is_static) {
+  // When compiling the intrinsic method itself, do not use this technique.
+  guarantee(callee() != C->method(), "cannot make slow-call to self");
+
+  ciMethod* method = callee();
+  // ensure the JVMS we have will be correct for this call
+  guarantee(method_id == method->intrinsic_id(), "must match");
+
+  const TypeFunc* tf = TypeFunc::make(method);
+  int tfdc = tf->domain()->cnt();
+  CallJavaNode* slow_call;
+  if (is_static) {
+    assert(!is_virtual, "");
+    slow_call = new(C, tfdc) CallStaticJavaNode(tf,
+                                SharedRuntime::get_resolve_static_call_stub(),
+                                method, bci());
+  } else if (is_virtual) {
+    null_check_receiver(method);
+    int vtable_index = methodOopDesc::invalid_vtable_index;
+    if (UseInlineCaches) {
+      // Suppress the vtable call
+    } else {
+      // hashCode and clone are not a miranda methods,
+      // so the vtable index is fixed.
+      // No need to use the linkResolver to get it.
+       vtable_index = method->vtable_index();
+    }
+    slow_call = new(C, tfdc) CallDynamicJavaNode(tf,
+                                SharedRuntime::get_resolve_virtual_call_stub(),
+                                method, vtable_index, bci());
+  } else {  // neither virtual nor static:  opt_virtual
+    null_check_receiver(method);
+    slow_call = new(C, tfdc) CallStaticJavaNode(tf,
+                                SharedRuntime::get_resolve_opt_virtual_call_stub(),
+                                method, bci());
+    slow_call->set_optimized_virtual(true);
+  }
+  set_arguments_for_java_call(slow_call);
+  set_edges_for_java_call(slow_call);
+  return slow_call;
+}
+
+
+//------------------------------inline_native_hashcode--------------------
+// Build special case code for calls to hashCode on an object.
+bool LibraryCallKit::inline_native_hashcode(bool is_virtual, bool is_static) {
+  assert(is_static == callee()->is_static(), "correct intrinsic selection");
+  assert(!(is_virtual && is_static), "either virtual, special, or static");
+
+  enum { _slow_path = 1, _fast_path, _null_path, PATH_LIMIT };
+
+  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+                                                      TypeInt::INT);
+  PhiNode*    result_io  = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+                                                      TypePtr::BOTTOM);
+  Node* obj = NULL;
+  if (!is_static) {
+    // Check for hashing null object
+    obj = null_check_receiver(callee());
+    if (stopped())  return true;        // unconditionally null
+    result_reg->init_req(_null_path, top());
+    result_val->init_req(_null_path, top());
+  } else {
+    // Do a null check, and return zero if null.
+    // System.identityHashCode(null) == 0
+    obj = argument(0);
+    Node* null_ctl = top();
+    obj = null_check_oop(obj, &null_ctl);
+    result_reg->init_req(_null_path, null_ctl);
+    result_val->init_req(_null_path, _gvn.intcon(0));
+  }
+
+  // Unconditionally null?  Then return right away.
+  if (stopped()) {
+    set_control( result_reg->in(_null_path) );
+    if (!stopped())
+      push(      result_val ->in(_null_path) );
+    return true;
+  }
+
+  // After null check, get the object's klass.
+  Node* obj_klass = load_object_klass(obj);
+
+  // This call may be virtual (invokevirtual) or bound (invokespecial).
+  // For each case we generate slightly different code.
+
+  // We only go to the fast case code if we pass a number of guards.  The
+  // paths which do not pass are accumulated in the slow_region.
+  RegionNode* slow_region = new (C, 1) RegionNode(1);
+  record_for_igvn(slow_region);
+
+  // If this is a virtual call, we generate a funny guard.  We pull out
+  // the vtable entry corresponding to hashCode() from the target object.
+  // If the target method which we are calling happens to be the native
+  // Object hashCode() method, we pass the guard.  We do not need this
+  // guard for non-virtual calls -- the caller is known to be the native
+  // Object hashCode().
+  if (is_virtual) {
+    generate_virtual_guard(obj_klass, slow_region);
+  }
+
+  // Get the header out of the object, use LoadMarkNode when available
+  Node* header_addr = basic_plus_adr(obj, oopDesc::mark_offset_in_bytes());
+  Node* header = make_load(NULL, header_addr, TypeRawPtr::BOTTOM, T_ADDRESS);
+  header = _gvn.transform( new (C, 2) CastP2XNode(NULL, header) );
+
+  // Test the header to see if it is unlocked.
+  Node *lock_mask      = _gvn.MakeConX(markOopDesc::biased_lock_mask_in_place);
+  Node *lmasked_header = _gvn.transform( new (C, 3) AndXNode(header, lock_mask) );
+  Node *unlocked_val   = _gvn.MakeConX(markOopDesc::unlocked_value);
+  Node *chk_unlocked   = _gvn.transform( new (C, 3) CmpXNode( lmasked_header, unlocked_val));
+  Node *test_unlocked  = _gvn.transform( new (C, 2) BoolNode( chk_unlocked, BoolTest::ne) );
+
+  generate_slow_guard(test_unlocked, slow_region);
+
+  // Get the hash value and check to see that it has been properly assigned.
+  // We depend on hash_mask being at most 32 bits and avoid the use of
+  // hash_mask_in_place because it could be larger than 32 bits in a 64-bit
+  // vm: see markOop.hpp.
+  Node *hash_mask      = _gvn.intcon(markOopDesc::hash_mask);
+  Node *hash_shift     = _gvn.intcon(markOopDesc::hash_shift);
+  Node *hshifted_header= _gvn.transform( new (C, 3) URShiftXNode(header, hash_shift) );
+  // This hack lets the hash bits live anywhere in the mark object now, as long
+  // as the shift drops the relevent bits into the low 32 bits.  Note that
+  // Java spec says that HashCode is an int so there's no point in capturing
+  // an 'X'-sized hashcode (32 in 32-bit build or 64 in 64-bit build).
+  hshifted_header      = ConvX2I(hshifted_header);
+  Node *hash_val       = _gvn.transform( new (C, 3) AndINode(hshifted_header, hash_mask) );
+
+  Node *no_hash_val    = _gvn.intcon(markOopDesc::no_hash);
+  Node *chk_assigned   = _gvn.transform( new (C, 3) CmpINode( hash_val, no_hash_val));
+  Node *test_assigned  = _gvn.transform( new (C, 2) BoolNode( chk_assigned, BoolTest::eq) );
+
+  generate_slow_guard(test_assigned, slow_region);
+
+  Node* init_mem = reset_memory();
+  // fill in the rest of the null path:
+  result_io ->init_req(_null_path, i_o());
+  result_mem->init_req(_null_path, init_mem);
+
+  result_val->init_req(_fast_path, hash_val);
+  result_reg->init_req(_fast_path, control());
+  result_io ->init_req(_fast_path, i_o());
+  result_mem->init_req(_fast_path, init_mem);
+
+  // Generate code for the slow case.  We make a call to hashCode().
+  set_control(_gvn.transform(slow_region));
+  if (!stopped()) {
+    // No need for PreserveJVMState, because we're using up the present state.
+    set_all_memory(init_mem);
+    vmIntrinsics::ID hashCode_id = vmIntrinsics::_hashCode;
+    if (is_static)   hashCode_id = vmIntrinsics::_identityHashCode;
+    CallJavaNode* slow_call = generate_method_call(hashCode_id, is_virtual, is_static);
+    Node* slow_result = set_results_for_java_call(slow_call);
+    // this->control() comes from set_results_for_java_call
+    result_reg->init_req(_slow_path, control());
+    result_val->init_req(_slow_path, slow_result);
+    result_io  ->set_req(_slow_path, i_o());
+    result_mem ->set_req(_slow_path, reset_memory());
+  }
+
+  // Return the combined state.
+  set_i_o(        _gvn.transform(result_io)  );
+  set_all_memory( _gvn.transform(result_mem) );
+  push_result(result_reg, result_val);
+
+  return true;
+}
+
+//---------------------------inline_native_getClass----------------------------
+// Build special case code for calls to hashCode on an object.
+bool LibraryCallKit::inline_native_getClass() {
+  Node* obj = null_check_receiver(callee());
+  if (stopped())  return true;
+  push( load_mirror_from_klass(load_object_klass(obj)) );
+  return true;
+}
+
+//-----------------inline_native_Reflection_getCallerClass---------------------
+// In the presence of deep enough inlining, getCallerClass() becomes a no-op.
+//
+// NOTE that this code must perform the same logic as
+// vframeStream::security_get_caller_frame in that it must skip
+// Method.invoke() and auxiliary frames.
+
+
+
+
+bool LibraryCallKit::inline_native_Reflection_getCallerClass() {
+  ciMethod*       method = callee();
+
+#ifndef PRODUCT
+  if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+    tty->print_cr("Attempting to inline sun.reflect.Reflection.getCallerClass");
+  }
+#endif
+
+  debug_only(int saved_sp = _sp);
+
+  // Argument words:  (int depth)
+  int nargs = 1;
+
+  _sp += nargs;
+  Node* caller_depth_node = pop();
+
+  assert(saved_sp == _sp, "must have correct argument count");
+
+  // The depth value must be a constant in order for the runtime call
+  // to be eliminated.
+  const TypeInt* caller_depth_type = _gvn.type(caller_depth_node)->isa_int();
+  if (caller_depth_type == NULL || !caller_depth_type->is_con()) {
+#ifndef PRODUCT
+    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+      tty->print_cr("  Bailing out because caller depth was not a constant");
+    }
+#endif
+    return false;
+  }
+  // Note that the JVM state at this point does not include the
+  // getCallerClass() frame which we are trying to inline. The
+  // semantics of getCallerClass(), however, are that the "first"
+  // frame is the getCallerClass() frame, so we subtract one from the
+  // requested depth before continuing. We don't inline requests of
+  // getCallerClass(0).
+  int caller_depth = caller_depth_type->get_con() - 1;
+  if (caller_depth < 0) {
+#ifndef PRODUCT
+    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+      tty->print_cr("  Bailing out because caller depth was %d", caller_depth);
+    }
+#endif
+    return false;
+  }
+
+  if (!jvms()->has_method()) {
+#ifndef PRODUCT
+    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+      tty->print_cr("  Bailing out because intrinsic was inlined at top level");
+    }
+#endif
+    return false;
+  }
+  int _depth = jvms()->depth();  // cache call chain depth
+
+  // Walk back up the JVM state to find the caller at the required
+  // depth. NOTE that this code must perform the same logic as
+  // vframeStream::security_get_caller_frame in that it must skip
+  // Method.invoke() and auxiliary frames. Note also that depth is
+  // 1-based (1 is the bottom of the inlining).
+  int inlining_depth = _depth;
+  JVMState* caller_jvms = NULL;
+
+  if (inlining_depth > 0) {
+    caller_jvms = jvms();
+    assert(caller_jvms = jvms()->of_depth(inlining_depth), "inlining_depth == our depth");
+    do {
+      // The following if-tests should be performed in this order
+      if (is_method_invoke_or_aux_frame(caller_jvms)) {
+        // Skip a Method.invoke() or auxiliary frame
+      } else if (caller_depth > 0) {
+        // Skip real frame
+        --caller_depth;
+      } else {
+        // We're done: reached desired caller after skipping.
+        break;
+      }
+      caller_jvms = caller_jvms->caller();
+      --inlining_depth;
+    } while (inlining_depth > 0);
+  }
+
+  if (inlining_depth == 0) {
+#ifndef PRODUCT
+    if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+      tty->print_cr("  Bailing out because caller depth (%d) exceeded inlining depth (%d)", caller_depth_type->get_con(), _depth);
+      tty->print_cr("  JVM state at this point:");
+      for (int i = _depth; i >= 1; i--) {
+        tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+      }
+    }
+#endif
+    return false; // Reached end of inlining
+  }
+
+  // Acquire method holder as java.lang.Class
+  ciInstanceKlass* caller_klass  = caller_jvms->method()->holder();
+  ciInstance*      caller_mirror = caller_klass->java_mirror();
+  // Push this as a constant
+  push(makecon(TypeInstPtr::make(caller_mirror)));
+#ifndef PRODUCT
+  if ((PrintIntrinsics || PrintInlining || PrintOptoInlining) && Verbose) {
+    tty->print_cr("  Succeeded: caller = %s.%s, caller depth = %d, depth = %d", caller_klass->name()->as_utf8(), caller_jvms->method()->name()->as_utf8(), caller_depth_type->get_con(), _depth);
+    tty->print_cr("  JVM state at this point:");
+    for (int i = _depth; i >= 1; i--) {
+      tty->print_cr("   %d) %s", i, jvms()->of_depth(i)->method()->name()->as_utf8());
+    }
+  }
+#endif
+  return true;
+}
+
+// Helper routine for above
+bool LibraryCallKit::is_method_invoke_or_aux_frame(JVMState* jvms) {
+  // Is this the Method.invoke method itself?
+  if (jvms->method()->intrinsic_id() == vmIntrinsics::_invoke)
+    return true;
+
+  // Is this a helper, defined somewhere underneath MethodAccessorImpl.
+  ciKlass* k = jvms->method()->holder();
+  if (k->is_instance_klass()) {
+    ciInstanceKlass* ik = k->as_instance_klass();
+    for (; ik != NULL; ik = ik->super()) {
+      if (ik->name() == ciSymbol::sun_reflect_MethodAccessorImpl() &&
+          ik == env()->find_system_klass(ik->name())) {
+        return true;
+      }
+    }
+  }
+
+  return false;
+}
+
+static int value_field_offset = -1;  // offset of the "value" field of AtomicLongCSImpl.  This is needed by
+                                     // inline_native_AtomicLong_attemptUpdate() but it has no way of
+                                     // computing it since there is no lookup field by name function in the
+                                     // CI interface.  This is computed and set by inline_native_AtomicLong_get().
+                                     // Using a static variable here is safe even if we have multiple compilation
+                                     // threads because the offset is constant.  At worst the same offset will be
+                                     // computed and  stored multiple
+
+bool LibraryCallKit::inline_native_AtomicLong_get() {
+  // Restore the stack and pop off the argument
+  _sp+=1;
+  Node *obj = pop();
+
+  // get the offset of the "value" field. Since the CI interfaces
+  // does not provide a way to look up a field by name, we scan the bytecodes
+  // to get the field index.  We expect the first 2 instructions of the method
+  // to be:
+  //    0 aload_0
+  //    1 getfield "value"
+  ciMethod* method = callee();
+  if (value_field_offset == -1)
+  {
+    ciField* value_field;
+    ciBytecodeStream iter(method);
+    Bytecodes::Code bc = iter.next();
+
+    if ((bc != Bytecodes::_aload_0) &&
+              ((bc != Bytecodes::_aload) || (iter.get_index() != 0)))
+      return false;
+    bc = iter.next();
+    if (bc != Bytecodes::_getfield)
+      return false;
+    bool ignore;
+    value_field = iter.get_field(ignore);
+    value_field_offset = value_field->offset_in_bytes();
+  }
+
+  // Null check without removing any arguments.
+  _sp++;
+  obj = do_null_check(obj, T_OBJECT);
+  _sp--;
+  // Check for locking null object
+  if (stopped()) return true;
+
+  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
+  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
+  int alias_idx = C->get_alias_index(adr_type);
+
+  Node *result = _gvn.transform(new (C, 3) LoadLLockedNode(control(), memory(alias_idx), adr));
+
+  push_pair(result);
+
+  return true;
+}
+
+bool LibraryCallKit::inline_native_AtomicLong_attemptUpdate() {
+  // Restore the stack and pop off the arguments
+  _sp+=5;
+  Node *newVal = pop_pair();
+  Node *oldVal = pop_pair();
+  Node *obj = pop();
+
+  // we need the offset of the "value" field which was computed when
+  // inlining the get() method.  Give up if we don't have it.
+  if (value_field_offset == -1)
+    return false;
+
+  // Null check without removing any arguments.
+  _sp+=5;
+  obj = do_null_check(obj, T_OBJECT);
+  _sp-=5;
+  // Check for locking null object
+  if (stopped()) return true;
+
+  Node *adr = basic_plus_adr(obj, obj, value_field_offset);
+  const TypePtr *adr_type = _gvn.type(adr)->is_ptr();
+  int alias_idx = C->get_alias_index(adr_type);
+
+  Node *result = _gvn.transform(new (C, 5) StoreLConditionalNode(control(), memory(alias_idx), adr, newVal, oldVal));
+  Node *store_proj = _gvn.transform( new (C, 1) SCMemProjNode(result));
+  set_memory(store_proj, alias_idx);
+
+  push(result);
+  return true;
+}
+
+bool LibraryCallKit::inline_fp_conversions(vmIntrinsics::ID id) {
+  // restore the arguments
+  _sp += arg_size();
+
+  switch (id) {
+  case vmIntrinsics::_floatToRawIntBits:
+    push(_gvn.transform( new (C, 2) MoveF2INode(pop())));
+    break;
+
+  case vmIntrinsics::_intBitsToFloat:
+    push(_gvn.transform( new (C, 2) MoveI2FNode(pop())));
+    break;
+
+  case vmIntrinsics::_doubleToRawLongBits:
+    push_pair(_gvn.transform( new (C, 2) MoveD2LNode(pop_pair())));
+    break;
+
+  case vmIntrinsics::_longBitsToDouble:
+    push_pair(_gvn.transform( new (C, 2) MoveL2DNode(pop_pair())));
+    break;
+
+  case vmIntrinsics::_doubleToLongBits: {
+    Node* value = pop_pair();
+
+    // two paths (plus control) merge in a wood
+    RegionNode *r = new (C, 3) RegionNode(3);
+    Node *phi = new (C, 3) PhiNode(r, TypeLong::LONG);
+
+    Node *cmpisnan = _gvn.transform( new (C, 3) CmpDNode(value, value));
+    // Build the boolean node
+    Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
+
+    // Branch either way.
+    // NaN case is less traveled, which makes all the difference.
+    IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+    Node *opt_isnan = _gvn.transform(ifisnan);
+    assert( opt_isnan->is_If(), "Expect an IfNode");
+    IfNode *opt_ifisnan = (IfNode*)opt_isnan;
+    Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
+
+    set_control(iftrue);
+
+    static const jlong nan_bits = CONST64(0x7ff8000000000000);
+    Node *slow_result = longcon(nan_bits); // return NaN
+    phi->init_req(1, _gvn.transform( slow_result ));
+    r->init_req(1, iftrue);
+
+    // Else fall through
+    Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
+    set_control(iffalse);
+
+    phi->init_req(2, _gvn.transform( new (C, 2) MoveD2LNode(value)));
+    r->init_req(2, iffalse);
+
+    // Post merge
+    set_control(_gvn.transform(r));
+    record_for_igvn(r);
+
+    Node* result = _gvn.transform(phi);
+    assert(result->bottom_type()->isa_long(), "must be");
+    push_pair(result);
+
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+    break;
+  }
+
+  case vmIntrinsics::_floatToIntBits: {
+    Node* value = pop();
+
+    // two paths (plus control) merge in a wood
+    RegionNode *r = new (C, 3) RegionNode(3);
+    Node *phi = new (C, 3) PhiNode(r, TypeInt::INT);
+
+    Node *cmpisnan = _gvn.transform( new (C, 3) CmpFNode(value, value));
+    // Build the boolean node
+    Node *bolisnan = _gvn.transform( new (C, 2) BoolNode( cmpisnan, BoolTest::ne ) );
+
+    // Branch either way.
+    // NaN case is less traveled, which makes all the difference.
+    IfNode *ifisnan = create_and_xform_if(control(), bolisnan, PROB_STATIC_FREQUENT, COUNT_UNKNOWN);
+    Node *opt_isnan = _gvn.transform(ifisnan);
+    assert( opt_isnan->is_If(), "Expect an IfNode");
+    IfNode *opt_ifisnan = (IfNode*)opt_isnan;
+    Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(opt_ifisnan) );
+
+    set_control(iftrue);
+
+    static const jint nan_bits = 0x7fc00000;
+    Node *slow_result = makecon(TypeInt::make(nan_bits)); // return NaN
+    phi->init_req(1, _gvn.transform( slow_result ));
+    r->init_req(1, iftrue);
+
+    // Else fall through
+    Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(opt_ifisnan) );
+    set_control(iffalse);
+
+    phi->init_req(2, _gvn.transform( new (C, 2) MoveF2INode(value)));
+    r->init_req(2, iffalse);
+
+    // Post merge
+    set_control(_gvn.transform(r));
+    record_for_igvn(r);
+
+    Node* result = _gvn.transform(phi);
+    assert(result->bottom_type()->isa_int(), "must be");
+    push(result);
+
+    C->set_has_split_ifs(true); // Has chance for split-if optimization
+
+    break;
+  }
+
+  default:
+    ShouldNotReachHere();
+  }
+
+  return true;
+}
+
+#ifdef _LP64
+#define XTOP ,top() /*additional argument*/
+#else  //_LP64
+#define XTOP        /*no additional argument*/
+#endif //_LP64
+
+//----------------------inline_unsafe_copyMemory-------------------------
+bool LibraryCallKit::inline_unsafe_copyMemory() {
+  if (callee()->is_static())  return false;  // caller must have the capability!
+  int nargs = 1 + 5 + 3;  // 5 args:  (src: ptr,off, dst: ptr,off, size)
+  assert(signature()->size() == nargs-1, "copy has 5 arguments");
+  null_check_receiver(callee());  // check then ignore argument(0)
+  if (stopped())  return true;
+
+  C->set_has_unsafe_access(true);  // Mark eventual nmethod as "unsafe".
+
+  Node* src_ptr = argument(1);
+  Node* src_off = ConvL2X(argument(2));
+  assert(argument(3)->is_top(), "2nd half of long");
+  Node* dst_ptr = argument(4);
+  Node* dst_off = ConvL2X(argument(5));
+  assert(argument(6)->is_top(), "2nd half of long");
+  Node* size    = ConvL2X(argument(7));
+  assert(argument(8)->is_top(), "2nd half of long");
+
+  assert(Unsafe_field_offset_to_byte_offset(11) == 11,
+         "fieldOffset must be byte-scaled");
+
+  Node* src = make_unsafe_address(src_ptr, src_off);
+  Node* dst = make_unsafe_address(dst_ptr, dst_off);
+
+  // Conservatively insert a memory barrier on all memory slices.
+  // Do not let writes of the copy source or destination float below the copy.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  // Call it.  Note that the length argument is not scaled.
+  make_runtime_call(RC_LEAF|RC_NO_FP,
+                    OptoRuntime::fast_arraycopy_Type(),
+                    StubRoutines::unsafe_arraycopy(),
+                    "unsafe_arraycopy",
+                    TypeRawPtr::BOTTOM,
+                    src, dst, size XTOP);
+
+  // Do not let reads of the copy destination float above the copy.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  return true;
+}
+
+
+//------------------------inline_native_clone----------------------------
+// Here are the simple edge cases:
+//  null receiver => normal trap
+//  virtual and clone was overridden => slow path to out-of-line clone
+//  not cloneable or finalizer => slow path to out-of-line Object.clone
+//
+// The general case has two steps, allocation and copying.
+// Allocation has two cases, and uses GraphKit::new_instance or new_array.
+//
+// Copying also has two cases, oop arrays and everything else.
+// Oop arrays use arrayof_oop_arraycopy (same as System.arraycopy).
+// Everything else uses the tight inline loop supplied by CopyArrayNode.
+//
+// These steps fold up nicely if and when the cloned object's klass
+// can be sharply typed as an object array, a type array, or an instance.
+//
+bool LibraryCallKit::inline_native_clone(bool is_virtual) {
+  int nargs = 1;
+  Node* obj = null_check_receiver(callee());
+  if (stopped())  return true;
+  Node* obj_klass = load_object_klass(obj);
+  const TypeKlassPtr* tklass = _gvn.type(obj_klass)->isa_klassptr();
+  const TypeOopPtr*   toop   = ((tklass != NULL)
+                                ? tklass->as_instance_type()
+                                : TypeInstPtr::NOTNULL);
+
+  // Conservatively insert a memory barrier on all memory slices.
+  // Do not let writes into the original float below the clone.
+  insert_mem_bar(Op_MemBarCPUOrder);
+
+  // paths into result_reg:
+  enum {
+    _slow_path = 1,     // out-of-line call to clone method (virtual or not)
+    _objArray_path,     // plain allocation, plus arrayof_oop_arraycopy
+    _fast_path,         // plain allocation, plus a CopyArray operation
+    PATH_LIMIT
+  };
+  RegionNode* result_reg = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  PhiNode*    result_val = new(C, PATH_LIMIT) PhiNode(result_reg,
+                                                      TypeInstPtr::NOTNULL);
+  PhiNode*    result_i_o = new(C, PATH_LIMIT) PhiNode(result_reg, Type::ABIO);
+  PhiNode*    result_mem = new(C, PATH_LIMIT) PhiNode(result_reg, Type::MEMORY,
+                                                      TypePtr::BOTTOM);
+  record_for_igvn(result_reg);
+
+  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+  int raw_adr_idx = Compile::AliasIdxRaw;
+  const bool raw_mem_only = true;
+
+  // paths into alloc_reg (on the fast path, just before the CopyArray):
+  enum { _typeArray_alloc = 1, _instance_alloc, ALLOC_LIMIT };
+  RegionNode* alloc_reg = new(C, ALLOC_LIMIT) RegionNode(ALLOC_LIMIT);
+  PhiNode*    alloc_val = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, raw_adr_type);
+  PhiNode*    alloc_siz = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, TypeX_X);
+  PhiNode*    alloc_i_o = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::ABIO);
+  PhiNode*    alloc_mem = new(C, ALLOC_LIMIT) PhiNode(alloc_reg, Type::MEMORY,
+                                                      raw_adr_type);
+  record_for_igvn(alloc_reg);
+
+  bool card_mark = false;  // (see below)
+
+  Node* array_ctl = generate_array_guard(obj_klass, (RegionNode*)NULL);
+  if (array_ctl != NULL) {
+    // It's an array.
+    PreserveJVMState pjvms(this);
+    set_control(array_ctl);
+    Node* obj_length = load_array_length(obj);
+    Node* obj_size = NULL;
+    _sp += nargs;  // set original stack for use by uncommon_trap
+    Node* alloc_obj = new_array(obj_klass, obj_length,
+                                raw_mem_only, &obj_size);
+    _sp -= nargs;
+    assert(obj_size != NULL, "");
+    Node* raw_obj = alloc_obj->in(1);
+    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+    if (ReduceBulkZeroing) {
+      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+      if (alloc != NULL) {
+        // We will be completely responsible for initializing this object.
+        alloc->maybe_set_complete(&_gvn);
+      }
+    }
+
+    if (!use_ReduceInitialCardMarks()) {
+      // If it is an oop array, it requires very special treatment,
+      // because card marking is required on each card of the array.
+      Node* is_obja = generate_objArray_guard(obj_klass, (RegionNode*)NULL);
+      if (is_obja != NULL) {
+        PreserveJVMState pjvms2(this);
+        set_control(is_obja);
+        // Generate a direct call to the right arraycopy function(s).
+        bool disjoint_bases = true;
+        bool length_never_negative = true;
+        generate_arraycopy(TypeAryPtr::OOPS, T_OBJECT,
+                           obj, intcon(0), alloc_obj, intcon(0),
+                           obj_length, nargs,
+                           disjoint_bases, length_never_negative);
+        result_reg->init_req(_objArray_path, control());
+        result_val->init_req(_objArray_path, alloc_obj);
+        result_i_o ->set_req(_objArray_path, i_o());
+        result_mem ->set_req(_objArray_path, reset_memory());
+      }
+    }
+    // We can dispense with card marks if we know the allocation
+    // comes out of eden (TLAB)...  In fact, ReduceInitialCardMarks
+    // causes the non-eden paths to simulate a fresh allocation,
+    // insofar that no further card marks are required to initialize
+    // the object.
+
+    // Otherwise, there are no card marks to worry about.
+    alloc_val->init_req(_typeArray_alloc, raw_obj);
+    alloc_siz->init_req(_typeArray_alloc, obj_size);
+    alloc_reg->init_req(_typeArray_alloc, control());
+    alloc_i_o->init_req(_typeArray_alloc, i_o());
+    alloc_mem->init_req(_typeArray_alloc, memory(raw_adr_type));
+  }
+
+  // We only go to the fast case code if we pass a number of guards.
+  // The paths which do not pass are accumulated in the slow_region.
+  RegionNode* slow_region = new (C, 1) RegionNode(1);
+  record_for_igvn(slow_region);
+  if (!stopped()) {
+    // It's an instance.  Make the slow-path tests.
+    // If this is a virtual call, we generate a funny guard.  We grab
+    // the vtable entry corresponding to clone() from the target object.
+    // If the target method which we are calling happens to be the
+    // Object clone() method, we pass the guard.  We do not need this
+    // guard for non-virtual calls; the caller is known to be the native
+    // Object clone().
+    if (is_virtual) {
+      generate_virtual_guard(obj_klass, slow_region);
+    }
+
+    // The object must be cloneable and must not have a finalizer.
+    // Both of these conditions may be checked in a single test.
+    // We could optimize the cloneable test further, but we don't care.
+    generate_access_flags_guard(obj_klass,
+                                // Test both conditions:
+                                JVM_ACC_IS_CLONEABLE | JVM_ACC_HAS_FINALIZER,
+                                // Must be cloneable but not finalizer:
+                                JVM_ACC_IS_CLONEABLE,
+                                slow_region);
+  }
+
+  if (!stopped()) {
+    // It's an instance, and it passed the slow-path tests.
+    PreserveJVMState pjvms(this);
+    Node* obj_size = NULL;
+    Node* alloc_obj = new_instance(obj_klass, NULL, raw_mem_only, &obj_size);
+    assert(obj_size != NULL, "");
+    Node* raw_obj = alloc_obj->in(1);
+    assert(raw_obj->is_Proj() && raw_obj->in(0)->is_Allocate(), "");
+    if (ReduceBulkZeroing) {
+      AllocateNode* alloc = AllocateNode::Ideal_allocation(alloc_obj, &_gvn);
+      if (alloc != NULL && !alloc->maybe_set_complete(&_gvn))
+        alloc = NULL;
+    }
+    if (!use_ReduceInitialCardMarks()) {
+      // Put in store barrier for any and all oops we are sticking
+      // into this object.  (We could avoid this if we could prove
+      // that the object type contains no oop fields at all.)
+      card_mark = true;
+    }
+    alloc_val->init_req(_instance_alloc, raw_obj);
+    alloc_siz->init_req(_instance_alloc, obj_size);
+    alloc_reg->init_req(_instance_alloc, control());
+    alloc_i_o->init_req(_instance_alloc, i_o());
+    alloc_mem->init_req(_instance_alloc, memory(raw_adr_type));
+  }
+
+  // Generate code for the slow case.  We make a call to clone().
+  set_control(_gvn.transform(slow_region));
+  if (!stopped()) {
+    PreserveJVMState pjvms(this);
+    CallJavaNode* slow_call = generate_method_call(vmIntrinsics::_clone, is_virtual);
+    Node* slow_result = set_results_for_java_call(slow_call);
+    // this->control() comes from set_results_for_java_call
+    result_reg->init_req(_slow_path, control());
+    result_val->init_req(_slow_path, slow_result);
+    result_i_o ->set_req(_slow_path, i_o());
+    result_mem ->set_req(_slow_path, reset_memory());
+  }
+
+  // The object is allocated, as an array and/or an instance.  Now copy it.
+  set_control( _gvn.transform(alloc_reg) );
+  set_i_o(     _gvn.transform(alloc_i_o) );
+  set_memory(  _gvn.transform(alloc_mem), raw_adr_type );
+  Node* raw_obj  = _gvn.transform(alloc_val);
+
+  if (!stopped()) {
+    // Copy the fastest available way.
+    // (No need for PreserveJVMState, since we're using it all up now.)
+    Node* src  = obj;
+    Node* dest = raw_obj;
+    Node* end  = dest;
+    Node* size = _gvn.transform(alloc_siz);
+
+    // Exclude the header.
+    int base_off = sizeof(oopDesc);
+    src  = basic_plus_adr(src,  base_off);
+    dest = basic_plus_adr(dest, base_off);
+    end  = basic_plus_adr(end,  size);
+
+    // Compute the length also, if needed:
+    Node* countx = size;
+    countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(base_off)) );
+    countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong) ));
+
+    // Select an appropriate instruction to initialize the range.
+    // The CopyArray instruction (if supported) can be optimized
+    // into a discrete set of scalar loads and stores.
+    bool disjoint_bases = true;
+    generate_unchecked_arraycopy(raw_adr_type, T_LONG, disjoint_bases,
+                                 src, NULL, dest, NULL, countx);
+
+    // Now that the object is properly initialized, type it as an oop.
+    // Use a secondary InitializeNode memory barrier.
+    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize, raw_adr_idx,
+                                                   raw_obj)->as_Initialize();
+    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
+    Node* new_obj = new(C, 2) CheckCastPPNode(control(), raw_obj,
+                                              TypeInstPtr::NOTNULL);
+    new_obj = _gvn.transform(new_obj);
+
+    // If necessary, emit some card marks afterwards.  (Non-arrays only.)
+    if (card_mark) {
+      Node* no_particular_value = NULL;
+      Node* no_particular_field = NULL;
+      post_barrier(control(),
+                   memory(raw_adr_type),
+                   new_obj,
+                   no_particular_field,
+                   raw_adr_idx,
+                   no_particular_value,
+                   T_OBJECT,
+                   false);
+    }
+    // Present the results of the slow call.
+    result_reg->init_req(_fast_path, control());
+    result_val->init_req(_fast_path, new_obj);
+    result_i_o ->set_req(_fast_path, i_o());
+    result_mem ->set_req(_fast_path, reset_memory());
+  }
+
+  // Return the combined state.
+  set_control(    _gvn.transform(result_reg) );
+  set_i_o(        _gvn.transform(result_i_o) );
+  set_all_memory( _gvn.transform(result_mem) );
+
+  // Cast the result to a sharper type, since we know what clone does.
+  Node* new_obj = _gvn.transform(result_val);
+  Node* cast    = new (C, 2) CheckCastPPNode(control(), new_obj, toop);
+  push(_gvn.transform(cast));
+
+  return true;
+}
+
+
+// constants for computing the copy function
+enum {
+  COPYFUNC_UNALIGNED = 0,
+  COPYFUNC_ALIGNED = 1,                 // src, dest aligned to HeapWordSize
+  COPYFUNC_CONJOINT = 0,
+  COPYFUNC_DISJOINT = 2                 // src != dest, or transfer can descend
+};
+
+// Note:  The condition "disjoint" applies also for overlapping copies
+// where an descending copy is permitted (i.e., dest_offset <= src_offset).
+static address
+select_arraycopy_function(BasicType t, bool aligned, bool disjoint, const char* &name) {
+  int selector =
+    (aligned  ? COPYFUNC_ALIGNED  : COPYFUNC_UNALIGNED) +
+    (disjoint ? COPYFUNC_DISJOINT : COPYFUNC_CONJOINT);
+
+#define RETURN_STUB(xxx_arraycopy) { \
+  name = #xxx_arraycopy; \
+  return StubRoutines::xxx_arraycopy(); }
+
+  switch (t) {
+  case T_BYTE:
+  case T_BOOLEAN:
+    switch (selector) {
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jbyte_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jbyte_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jbyte_disjoint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jbyte_disjoint_arraycopy);
+    }
+  case T_CHAR:
+  case T_SHORT:
+    switch (selector) {
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jshort_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jshort_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jshort_disjoint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jshort_disjoint_arraycopy);
+    }
+  case T_INT:
+  case T_FLOAT:
+    switch (selector) {
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jint_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jint_disjoint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jint_disjoint_arraycopy);
+    }
+  case T_DOUBLE:
+  case T_LONG:
+    switch (selector) {
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jlong_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jlong_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(jlong_disjoint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_jlong_disjoint_arraycopy);
+    }
+  case T_ARRAY:
+  case T_OBJECT:
+    switch (selector) {
+    case COPYFUNC_CONJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_arraycopy);
+    case COPYFUNC_CONJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_UNALIGNED:  RETURN_STUB(oop_disjoint_arraycopy);
+    case COPYFUNC_DISJOINT | COPYFUNC_ALIGNED:    RETURN_STUB(arrayof_oop_disjoint_arraycopy);
+    }
+  default:
+    ShouldNotReachHere();
+    return NULL;
+  }
+
+#undef RETURN_STUB
+}
+
+//------------------------------basictype2arraycopy----------------------------
+address LibraryCallKit::basictype2arraycopy(BasicType t,
+                                            Node* src_offset,
+                                            Node* dest_offset,
+                                            bool disjoint_bases,
+                                            const char* &name) {
+  const TypeInt* src_offset_inttype  = gvn().find_int_type(src_offset);;
+  const TypeInt* dest_offset_inttype = gvn().find_int_type(dest_offset);;
+
+  bool aligned = false;
+  bool disjoint = disjoint_bases;
+
+  // if the offsets are the same, we can treat the memory regions as
+  // disjoint, because either the memory regions are in different arrays,
+  // or they are identical (which we can treat as disjoint.)  We can also
+  // treat a copy with a destination index  less that the source index
+  // as disjoint since a low->high copy will work correctly in this case.
+  if (src_offset_inttype != NULL && src_offset_inttype->is_con() &&
+      dest_offset_inttype != NULL && dest_offset_inttype->is_con()) {
+    // both indices are constants
+    int s_offs = src_offset_inttype->get_con();
+    int d_offs = dest_offset_inttype->get_con();
+    int element_size = type2aelembytes[t];
+    aligned = ((arrayOopDesc::base_offset_in_bytes(t) + s_offs * element_size) % HeapWordSize == 0) &&
+              ((arrayOopDesc::base_offset_in_bytes(t) + d_offs * element_size) % HeapWordSize == 0);
+    if (s_offs >= d_offs)  disjoint = true;
+  } else if (src_offset == dest_offset && src_offset != NULL) {
+    // This can occur if the offsets are identical non-constants.
+    disjoint = true;
+  }
+
+  return select_arraycopy_function(t, aligned, disjoint, name);
+}
+
+
+//------------------------------inline_arraycopy-----------------------
+bool LibraryCallKit::inline_arraycopy() {
+  // Restore the stack and pop off the arguments.
+  int nargs = 5;  // 2 oops, 3 ints, no size_t or long
+  assert(callee()->signature()->size() == nargs, "copy has 5 arguments");
+
+  Node *src         = argument(0);
+  Node *src_offset  = argument(1);
+  Node *dest        = argument(2);
+  Node *dest_offset = argument(3);
+  Node *length      = argument(4);
+
+  // Compile time checks.  If any of these checks cannot be verified at compile time,
+  // we do not make a fast path for this call.  Instead, we let the call remain as it
+  // is.  The checks we choose to mandate at compile time are:
+  //
+  // (1) src and dest are arrays.
+  const Type* src_type = src->Value(&_gvn);
+  const Type* dest_type = dest->Value(&_gvn);
+  const TypeAryPtr* top_src = src_type->isa_aryptr();
+  const TypeAryPtr* top_dest = dest_type->isa_aryptr();
+  if (top_src  == NULL || top_src->klass()  == NULL ||
+      top_dest == NULL || top_dest->klass() == NULL) {
+    // Conservatively insert a memory barrier on all memory slices.
+    // Do not let writes into the source float below the arraycopy.
+    insert_mem_bar(Op_MemBarCPUOrder);
+
+    // Call StubRoutines::generic_arraycopy stub.
+    generate_arraycopy(TypeRawPtr::BOTTOM, T_CONFLICT,
+                       src, src_offset, dest, dest_offset, length,
+                       nargs);
+
+    // Do not let reads from the destination float above the arraycopy.
+    // Since we cannot type the arrays, we don't know which slices
+    // might be affected.  We could restrict this barrier only to those
+    // memory slices which pertain to array elements--but don't bother.
+    if (!InsertMemBarAfterArraycopy)
+      // (If InsertMemBarAfterArraycopy, there is already one in place.)
+      insert_mem_bar(Op_MemBarCPUOrder);
+    return true;
+  }
+
+  // (2) src and dest arrays must have elements of the same BasicType
+  // Figure out the size and type of the elements we will be copying.
+  BasicType src_elem  =  top_src->klass()->as_array_klass()->element_type()->basic_type();
+  BasicType dest_elem = top_dest->klass()->as_array_klass()->element_type()->basic_type();
+  if (src_elem  == T_ARRAY)  src_elem  = T_OBJECT;
+  if (dest_elem == T_ARRAY)  dest_elem = T_OBJECT;
+
+  if (src_elem != dest_elem || dest_elem == T_VOID) {
+    // The component types are not the same or are not recognized.  Punt.
+    // (But, avoid the native method wrapper to JVM_ArrayCopy.)
+    generate_slow_arraycopy(TypePtr::BOTTOM,
+                            src, src_offset, dest, dest_offset, length,
+                            nargs);
+    return true;
+  }
+
+  //---------------------------------------------------------------------------
+  // We will make a fast path for this call to arraycopy.
+
+  // We have the following tests left to perform:
+  //
+  // (3) src and dest must not be null.
+  // (4) src_offset must not be negative.
+  // (5) dest_offset must not be negative.
+  // (6) length must not be negative.
+  // (7) src_offset + length must not exceed length of src.
+  // (8) dest_offset + length must not exceed length of dest.
+  // (9) each element of an oop array must be assignable
+
+  RegionNode* slow_region = new (C, 1) RegionNode(1);
+  record_for_igvn(slow_region);
+
+  // (3) operands must not be null
+  // We currently perform our null checks with the do_null_check routine.
+  // This means that the null exceptions will be reported in the caller
+  // rather than (correctly) reported inside of the native arraycopy call.
+  // This should be corrected, given time.  We do our null check with the
+  // stack pointer restored.
+  _sp += nargs;
+  src  = do_null_check(src,  T_ARRAY);
+  dest = do_null_check(dest, T_ARRAY);
+  _sp -= nargs;
+
+  // (4) src_offset must not be negative.
+  generate_negative_guard(src_offset, slow_region);
+
+  // (5) dest_offset must not be negative.
+  generate_negative_guard(dest_offset, slow_region);
+
+  // (6) length must not be negative (moved to generate_arraycopy()).
+  // generate_negative_guard(length, slow_region);
+
+  // (7) src_offset + length must not exceed length of src.
+  generate_limit_guard(src_offset, length,
+                       load_array_length(src),
+                       slow_region);
+
+  // (8) dest_offset + length must not exceed length of dest.
+  generate_limit_guard(dest_offset, length,
+                       load_array_length(dest),
+                       slow_region);
+
+  // (9) each element of an oop array must be assignable
+  // The generate_arraycopy subroutine checks this.
+
+  // This is where the memory effects are placed:
+  const TypePtr* adr_type = TypeAryPtr::get_array_body_type(dest_elem);
+  generate_arraycopy(adr_type, dest_elem,
+                     src, src_offset, dest, dest_offset, length,
+                     nargs, false, false, slow_region);
+
+  return true;
+}
+
+//-----------------------------generate_arraycopy----------------------
+// Generate an optimized call to arraycopy.
+// Caller must guard against non-arrays.
+// Caller must determine a common array basic-type for both arrays.
+// Caller must validate offsets against array bounds.
+// The slow_region has already collected guard failure paths
+// (such as out of bounds length or non-conformable array types).
+// The generated code has this shape, in general:
+//
+//     if (length == 0)  return   // via zero_path
+//     slowval = -1
+//     if (types unknown) {
+//       slowval = call generic copy loop
+//       if (slowval == 0)  return  // via checked_path
+//     } else if (indexes in bounds) {
+//       if ((is object array) && !(array type check)) {
+//         slowval = call checked copy loop
+//         if (slowval == 0)  return  // via checked_path
+//       } else {
+//         call bulk copy loop
+//         return  // via fast_path
+//       }
+//     }
+//     // adjust params for remaining work:
+//     if (slowval != -1) {
+//       n = -1^slowval; src_offset += n; dest_offset += n; length -= n
+//     }
+//   slow_region:
+//     call slow arraycopy(src, src_offset, dest, dest_offset, length)
+//     return  // via slow_call_path
+//
+// This routine is used from several intrinsics:  System.arraycopy,
+// Object.clone (the array subcase), and Arrays.copyOf[Range].
+//
+void
+LibraryCallKit::generate_arraycopy(const TypePtr* adr_type,
+                                   BasicType basic_elem_type,
+                                   Node* src,  Node* src_offset,
+                                   Node* dest, Node* dest_offset,
+                                   Node* copy_length,
+                                   int nargs,
+                                   bool disjoint_bases,
+                                   bool length_never_negative,
+                                   RegionNode* slow_region) {
+
+  if (slow_region == NULL) {
+    slow_region = new(C,1) RegionNode(1);
+    record_for_igvn(slow_region);
+  }
+
+  Node* original_dest      = dest;
+  AllocateArrayNode* alloc = NULL;  // used for zeroing, if needed
+  Node* raw_dest           = NULL;  // used before zeroing, if needed
+  bool  must_clear_dest    = false;
+
+  // See if this is the initialization of a newly-allocated array.
+  // If so, we will take responsibility here for initializing it to zero.
+  // (Note:  Because tightly_coupled_allocation performs checks on the
+  // out-edges of the dest, we need to avoid making derived pointers
+  // from it until we have checked its uses.)
+  if (ReduceBulkZeroing
+      && !ZeroTLAB              // pointless if already zeroed
+      && basic_elem_type != T_CONFLICT // avoid corner case
+      && !_gvn.eqv_uncast(src, dest)
+      && ((alloc = tightly_coupled_allocation(dest, slow_region))
+          != NULL)
+      && alloc->maybe_set_complete(&_gvn)) {
+    // "You break it, you buy it."
+    InitializeNode* init = alloc->initialization();
+    assert(init->is_complete(), "we just did this");
+    assert(dest->Opcode() == Op_CheckCastPP, "sanity");
+    assert(dest->in(0)->in(0) == init, "dest pinned");
+    raw_dest = dest->in(1);  // grab the raw pointer!
+    original_dest = dest;
+    dest = raw_dest;
+    adr_type = TypeRawPtr::BOTTOM;  // all initializations are into raw memory
+    // Decouple the original InitializeNode, turning it into a simple membar.
+    // We will build a new one at the end of this routine.
+    init->set_req(InitializeNode::RawAddress, top());
+    // From this point on, every exit path is responsible for
+    // initializing any non-copied parts of the object to zero.
+    must_clear_dest = true;
+  } else {
+    // No zeroing elimination here.
+    alloc             = NULL;
+    //original_dest   = dest;
+    //must_clear_dest = false;
+  }
+
+  // Results are placed here:
+  enum { fast_path        = 1,  // normal void-returning assembly stub
+         checked_path     = 2,  // special assembly stub with cleanup
+         slow_call_path   = 3,  // something went wrong; call the VM
+         zero_path        = 4,  // bypass when length of copy is zero
+         bcopy_path       = 5,  // copy primitive array by 64-bit blocks
+         PATH_LIMIT       = 6
+  };
+  RegionNode* result_region = new(C, PATH_LIMIT) RegionNode(PATH_LIMIT);
+  PhiNode*    result_i_o    = new(C, PATH_LIMIT) PhiNode(result_region, Type::ABIO);
+  PhiNode*    result_memory = new(C, PATH_LIMIT) PhiNode(result_region, Type::MEMORY, adr_type);
+  record_for_igvn(result_region);
+  _gvn.set_type_bottom(result_i_o);
+  _gvn.set_type_bottom(result_memory);
+  assert(adr_type != TypePtr::BOTTOM, "must be RawMem or a T[] slice");
+
+  // The slow_control path:
+  Node* slow_control;
+  Node* slow_i_o = i_o();
+  Node* slow_mem = memory(adr_type);
+  debug_only(slow_control = (Node*) badAddress);
+
+  // Checked control path:
+  Node* checked_control = top();
+  Node* checked_mem     = NULL;
+  Node* checked_i_o     = NULL;
+  Node* checked_value   = NULL;
+
+  if (basic_elem_type == T_CONFLICT) {
+    assert(!must_clear_dest, "");
+    Node* cv = generate_generic_arraycopy(adr_type,
+                                          src, src_offset, dest, dest_offset,
+                                          copy_length, nargs);
+    if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
+    checked_control = control();
+    checked_i_o     = i_o();
+    checked_mem     = memory(adr_type);
+    checked_value   = cv;
+    set_control(top());         // no fast path
+  }
+
+  Node* not_pos = generate_nonpositive_guard(copy_length, length_never_negative);
+  if (not_pos != NULL) {
+    PreserveJVMState pjvms(this);
+    set_control(not_pos);
+
+    // (6) length must not be negative.
+    if (!length_never_negative) {
+      generate_negative_guard(copy_length, slow_region);
+    }
+
+    if (!stopped() && must_clear_dest) {
+      Node* dest_length = alloc->in(AllocateNode::ALength);
+      if (_gvn.eqv_uncast(copy_length, dest_length)
+          || _gvn.find_int_con(dest_length, 1) <= 0) {
+        // There is no zeroing to do.
+      } else {
+        // Clear the whole thing since there are no source elements to copy.
+        generate_clear_array(adr_type, dest, basic_elem_type,
+                             intcon(0), NULL,
+                             alloc->in(AllocateNode::AllocSize));
+      }
+    }
+
+    // Present the results of the fast call.
+    result_region->init_req(zero_path, control());
+    result_i_o   ->init_req(zero_path, i_o());
+    result_memory->init_req(zero_path, memory(adr_type));
+  }
+
+  if (!stopped() && must_clear_dest) {
+    // We have to initialize the *uncopied* part of the array to zero.
+    // The copy destination is the slice dest[off..off+len].  The other slices
+    // are dest_head = dest[0..off] and dest_tail = dest[off+len..dest.length].
+    Node* dest_size   = alloc->in(AllocateNode::AllocSize);
+    Node* dest_length = alloc->in(AllocateNode::ALength);
+    Node* dest_tail   = _gvn.transform( new(C,3) AddINode(dest_offset,
+                                                          copy_length) );
+
+    // If there is a head section that needs zeroing, do it now.
+    if (find_int_con(dest_offset, -1) != 0) {
+      generate_clear_array(adr_type, dest, basic_elem_type,
+                           intcon(0), dest_offset,
+                           NULL);
+    }
+
+    // Next, perform a dynamic check on the tail length.
+    // It is often zero, and we can win big if we prove this.
+    // There are two wins:  Avoid generating the ClearArray
+    // with its attendant messy index arithmetic, and upgrade
+    // the copy to a more hardware-friendly word size of 64 bits.
+    Node* tail_ctl = NULL;
+    if (!stopped() && !_gvn.eqv_uncast(dest_tail, dest_length)) {
+      Node* cmp_lt   = _gvn.transform( new(C,3) CmpINode(dest_tail, dest_length) );
+      Node* bol_lt   = _gvn.transform( new(C,2) BoolNode(cmp_lt, BoolTest::lt) );
+      tail_ctl = generate_slow_guard(bol_lt, NULL);
+      assert(tail_ctl != NULL || !stopped(), "must be an outcome");
+    }
+
+    // At this point, let's assume there is no tail.
+    if (!stopped() && alloc != NULL && basic_elem_type != T_OBJECT) {
+      // There is no tail.  Try an upgrade to a 64-bit copy.
+      bool didit = false;
+      { PreserveJVMState pjvms(this);
+        didit = generate_block_arraycopy(adr_type, basic_elem_type, alloc,
+                                         src, src_offset, dest, dest_offset,
+                                         dest_size);
+        if (didit) {
+          // Present the results of the block-copying fast call.
+          result_region->init_req(bcopy_path, control());
+          result_i_o   ->init_req(bcopy_path, i_o());
+          result_memory->init_req(bcopy_path, memory(adr_type));
+        }
+      }
+      if (didit)
+        set_control(top());     // no regular fast path
+    }
+
+    // Clear the tail, if any.
+    if (tail_ctl != NULL) {
+      Node* notail_ctl = stopped() ? NULL : control();
+      set_control(tail_ctl);
+      if (notail_ctl == NULL) {
+        generate_clear_array(adr_type, dest, basic_elem_type,
+                             dest_tail, NULL,
+                             dest_size);
+      } else {
+        // Make a local merge.
+        Node* done_ctl = new(C,3) RegionNode(3);
+        Node* done_mem = new(C,3) PhiNode(done_ctl, Type::MEMORY, adr_type);
+        done_ctl->init_req(1, notail_ctl);
+        done_mem->init_req(1, memory(adr_type));
+        generate_clear_array(adr_type, dest, basic_elem_type,
+                             dest_tail, NULL,
+                             dest_size);
+        done_ctl->init_req(2, control());
+        done_mem->init_req(2, memory(adr_type));
+        set_control( _gvn.transform(done_ctl) );
+        set_memory(  _gvn.transform(done_mem), adr_type );
+      }
+    }
+  }
+
+  BasicType copy_type = basic_elem_type;
+  assert(basic_elem_type != T_ARRAY, "caller must fix this");
+  if (!stopped() && copy_type == T_OBJECT) {
+    // If src and dest have compatible element types, we can copy bits.
+    // Types S[] and D[] are compatible if D is a supertype of S.
+    //
+    // If they are not, we will use checked_oop_disjoint_arraycopy,
+    // which performs a fast optimistic per-oop check, and backs off
+    // further to JVM_ArrayCopy on the first per-oop check that fails.
+    // (Actually, we don't move raw bits only; the GC requires card marks.)
+
+    // Get the klassOop for both src and dest
+    Node* src_klass  = load_object_klass(src);
+    Node* dest_klass = load_object_klass(dest);
+
+    // Generate the subtype check.
+    // This might fold up statically, or then again it might not.
+    //
+    // Non-static example:  Copying List<String>.elements to a new String[].
+    // The backing store for a List<String> is always an Object[],
+    // but its elements are always type String, if the generic types
+    // are correct at the source level.
+    //
+    // Test S[] against D[], not S against D, because (probably)
+    // the secondary supertype cache is less busy for S[] than S.
+    // This usually only matters when D is an interface.
+    Node* not_subtype_ctrl = gen_subtype_check(src_klass, dest_klass);
+    // Plug failing path into checked_oop_disjoint_arraycopy
+    if (not_subtype_ctrl != top()) {
+      PreserveJVMState pjvms(this);
+      set_control(not_subtype_ctrl);
+      // (At this point we can assume disjoint_bases, since types differ.)
+      int ek_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+      Node* p1 = basic_plus_adr(dest_klass, ek_offset);
+      Node* n1 = new (C, 3) LoadKlassNode(0, immutable_memory(), p1, TypeRawPtr::BOTTOM);
+      Node* dest_elem_klass = _gvn.transform(n1);
+      Node* cv = generate_checkcast_arraycopy(adr_type,
+                                              dest_elem_klass,
+                                              src, src_offset, dest, dest_offset,
+                                              copy_length,
+                                              nargs);
+      if (cv == NULL)  cv = intcon(-1);  // failure (no stub available)
+      checked_control = control();
+      checked_i_o     = i_o();
+      checked_mem     = memory(adr_type);
+      checked_value   = cv;
+    }
+    // At this point we know we do not need type checks on oop stores.
+
+    // Let's see if we need card marks:
+    if (alloc != NULL && use_ReduceInitialCardMarks()) {
+      // If we do not need card marks, copy using the jint or jlong stub.
+      copy_type = LP64_ONLY(T_LONG) NOT_LP64(T_INT);
+      assert(type2aelembytes[basic_elem_type] == type2aelembytes[copy_type],
+             "sizes agree");
+    }
+  }
+
+  if (!stopped()) {
+    // Generate the fast path, if possible.
+    PreserveJVMState pjvms(this);
+    generate_unchecked_arraycopy(adr_type, copy_type, disjoint_bases,
+                                 src, src_offset, dest, dest_offset,
+                                 ConvI2X(copy_length));
+
+    // Present the results of the fast call.
+    result_region->init_req(fast_path, control());
+    result_i_o   ->init_req(fast_path, i_o());
+    result_memory->init_req(fast_path, memory(adr_type));
+  }
+
+  // Here are all the slow paths up to this point, in one bundle:
+  slow_control = top();
+  if (slow_region != NULL)
+    slow_control = _gvn.transform(slow_region);
+  debug_only(slow_region = (RegionNode*)badAddress);
+
+  set_control(checked_control);
+  if (!stopped()) {
+    // Clean up after the checked call.
+    // The returned value is either 0 or -1^K,
+    // where K = number of partially transferred array elements.
+    Node* cmp = _gvn.transform( new(C, 3) CmpINode(checked_value, intcon(0)) );
+    Node* bol = _gvn.transform( new(C, 2) BoolNode(cmp, BoolTest::eq) );
+    IfNode* iff = create_and_map_if(control(), bol, PROB_MAX, COUNT_UNKNOWN);
+
+    // If it is 0, we are done, so transfer to the end.
+    Node* checks_done = _gvn.transform( new(C, 1) IfTrueNode(iff) );
+    result_region->init_req(checked_path, checks_done);
+    result_i_o   ->init_req(checked_path, checked_i_o);
+    result_memory->init_req(checked_path, checked_mem);
+
+    // If it is not zero, merge into the slow call.
+    set_control( _gvn.transform( new(C, 1) IfFalseNode(iff) ));
+    RegionNode* slow_reg2 = new(C, 3) RegionNode(3);
+    PhiNode*    slow_i_o2 = new(C, 3) PhiNode(slow_reg2, Type::ABIO);
+    PhiNode*    slow_mem2 = new(C, 3) PhiNode(slow_reg2, Type::MEMORY, adr_type);
+    record_for_igvn(slow_reg2);
+    slow_reg2  ->init_req(1, slow_control);
+    slow_i_o2  ->init_req(1, slow_i_o);
+    slow_mem2  ->init_req(1, slow_mem);
+    slow_reg2  ->init_req(2, control());
+    slow_i_o2  ->init_req(2, i_o());
+    slow_mem2  ->init_req(2, memory(adr_type));
+
+    slow_control = _gvn.transform(slow_reg2);
+    slow_i_o     = _gvn.transform(slow_i_o2);
+    slow_mem     = _gvn.transform(slow_mem2);
+
+    if (alloc != NULL) {
+      // We'll restart from the very beginning, after zeroing the whole thing.
+      // This can cause double writes, but that's OK since dest is brand new.
+      // So we ignore the low 31 bits of the value returned from the stub.
+    } else {
+      // We must continue the copy exactly where it failed, or else
+      // another thread might see the wrong number of writes to dest.
+      Node* checked_offset = _gvn.transform( new(C, 3) XorINode(checked_value, intcon(-1)) );
+      Node* slow_offset    = new(C, 3) PhiNode(slow_reg2, TypeInt::INT);
+      slow_offset->init_req(1, intcon(0));
+      slow_offset->init_req(2, checked_offset);
+      slow_offset  = _gvn.transform(slow_offset);
+
+      // Adjust the arguments by the conditionally incoming offset.
+      Node* src_off_plus  = _gvn.transform( new(C, 3) AddINode(src_offset,  slow_offset) );
+      Node* dest_off_plus = _gvn.transform( new(C, 3) AddINode(dest_offset, slow_offset) );
+      Node* length_minus  = _gvn.transform( new(C, 3) SubINode(copy_length, slow_offset) );
+
+      // Tweak the node variables to adjust the code produced below:
+      src_offset  = src_off_plus;
+      dest_offset = dest_off_plus;
+      copy_length = length_minus;
+    }
+  }
+
+  set_control(slow_control);
+  if (!stopped()) {
+    // Generate the slow path, if needed.
+    PreserveJVMState pjvms(this);   // replace_in_map may trash the map
+
+    set_memory(slow_mem, adr_type);
+    set_i_o(slow_i_o);
+
+    if (must_clear_dest) {
+      generate_clear_array(adr_type, dest, basic_elem_type,
+                           intcon(0), NULL,
+                           alloc->in(AllocateNode::AllocSize));
+    }
+
+    if (dest != original_dest) {
+      // Promote from rawptr to oop, so it looks right in the call's GC map.
+      dest = _gvn.transform( new(C,2) CheckCastPPNode(control(), dest,
+                                                      TypeInstPtr::NOTNULL) );
+
+      // Edit the call's debug-info to avoid referring to original_dest.
+      // (The problem with original_dest is that it isn't ready until
+      // after the InitializeNode completes, but this stuff is before.)
+      // Substitute in the locally valid dest_oop.
+      replace_in_map(original_dest, dest);
+    }
+
+    generate_slow_arraycopy(adr_type,
+                            src, src_offset, dest, dest_offset,
+                            copy_length, nargs);
+
+    result_region->init_req(slow_call_path, control());
+    result_i_o   ->init_req(slow_call_path, i_o());
+    result_memory->init_req(slow_call_path, memory(adr_type));
+  }
+
+  // Remove unused edges.
+  for (uint i = 1; i < result_region->req(); i++) {
+    if (result_region->in(i) == NULL)
+      result_region->init_req(i, top());
+  }
+
+  // Finished; return the combined state.
+  set_control( _gvn.transform(result_region) );
+  set_i_o(     _gvn.transform(result_i_o)    );
+  set_memory(  _gvn.transform(result_memory), adr_type );
+
+  if (dest != original_dest) {
+    // Pin the "finished" array node after the arraycopy/zeroing operations.
+    // Use a secondary InitializeNode memory barrier.
+    InitializeNode* init = insert_mem_bar_volatile(Op_Initialize,
+                                                   Compile::AliasIdxRaw,
+                                                   raw_dest)->as_Initialize();
+    init->set_complete(&_gvn);  // (there is no corresponding AllocateNode)
+    _gvn.hash_delete(original_dest);
+    original_dest->set_req(0, control());
+    _gvn.hash_find_insert(original_dest);  // put back into GVN table
+  }
+
+  // The memory edges above are precise in order to model effects around
+  // array copyies accurately to allow value numbering of field loads around
+  // arraycopy.  Such field loads, both before and after, are common in Java
+  // collections and similar classes involving header/array data structures.
+  //
+  // But with low number of register or when some registers are used or killed
+  // by arraycopy calls it causes registers spilling on stack. See 6544710.
+  // The next memory barrier is added to avoid it. If the arraycopy can be
+  // optimized away (which it can, sometimes) then we can manually remove
+  // the membar also.
+  if (InsertMemBarAfterArraycopy)
+    insert_mem_bar(Op_MemBarCPUOrder);
+}
+
+
+// Helper function which determines if an arraycopy immediately follows
+// an allocation, with no intervening tests or other escapes for the object.
+AllocateArrayNode*
+LibraryCallKit::tightly_coupled_allocation(Node* ptr,
+                                           RegionNode* slow_region) {
+  if (stopped())             return NULL;  // no fast path
+  if (C->AliasLevel() == 0)  return NULL;  // no MergeMems around
+
+  AllocateArrayNode* alloc = AllocateArrayNode::Ideal_array_allocation(ptr, &_gvn);
+  if (alloc == NULL)  return NULL;
+
+  Node* rawmem = memory(Compile::AliasIdxRaw);
+  // Is the allocation's memory state untouched?
+  if (!(rawmem->is_Proj() && rawmem->in(0)->is_Initialize())) {
+    // Bail out if there have been raw-memory effects since the allocation.
+    // (Example:  There might have been a call or safepoint.)
+    return NULL;
+  }
+  rawmem = rawmem->in(0)->as_Initialize()->memory(Compile::AliasIdxRaw);
+  if (!(rawmem->is_Proj() && rawmem->in(0) == alloc)) {
+    return NULL;
+  }
+
+  // There must be no unexpected observers of this allocation.
+  for (DUIterator_Fast imax, i = ptr->fast_outs(imax); i < imax; i++) {
+    Node* obs = ptr->fast_out(i);
+    if (obs != this->map()) {
+      return NULL;
+    }
+  }
+
+  // This arraycopy must unconditionally follow the allocation of the ptr.
+  Node* alloc_ctl = ptr->in(0);
+  assert(just_allocated_object(alloc_ctl) == ptr, "most recent allo");
+
+  Node* ctl = control();
+  while (ctl != alloc_ctl) {
+    // There may be guards which feed into the slow_region.
+    // Any other control flow means that we might not get a chance
+    // to finish initializing the allocated object.
+    if ((ctl->is_IfFalse() || ctl->is_IfTrue()) && ctl->in(0)->is_If()) {
+      IfNode* iff = ctl->in(0)->as_If();
+      Node* not_ctl = iff->proj_out(1 - ctl->as_Proj()->_con);
+      assert(not_ctl != NULL && not_ctl != ctl, "found alternate");
+      if (slow_region != NULL && slow_region->find_edge(not_ctl) >= 1) {
+        ctl = iff->in(0);       // This test feeds the known slow_region.
+        continue;
+      }
+      // One more try:  Various low-level checks bottom out in
+      // uncommon traps.  If the debug-info of the trap omits
+      // any reference to the allocation, as we've already
+      // observed, then there can be no objection to the trap.
+      bool found_trap = false;
+      for (DUIterator_Fast jmax, j = not_ctl->fast_outs(jmax); j < jmax; j++) {
+        Node* obs = not_ctl->fast_out(j);
+        if (obs->in(0) == not_ctl && obs->is_Call() &&
+            (obs->as_Call()->entry_point() ==
+             SharedRuntime::uncommon_trap_blob()->instructions_begin())) {
+          found_trap = true; break;
+        }
+      }
+      if (found_trap) {
+        ctl = iff->in(0);       // This test feeds a harmless uncommon trap.
+        continue;
+      }
+    }
+    return NULL;
+  }
+
+  // If we get this far, we have an allocation which immediately
+  // precedes the arraycopy, and we can take over zeroing the new object.
+  // The arraycopy will finish the initialization, and provide
+  // a new control state to which we will anchor the destination pointer.
+
+  return alloc;
+}
+
+// Helper for initialization of arrays, creating a ClearArray.
+// It writes zero bits in [start..end), within the body of an array object.
+// The memory effects are all chained onto the 'adr_type' alias category.
+//
+// Since the object is otherwise uninitialized, we are free
+// to put a little "slop" around the edges of the cleared area,
+// as long as it does not go back into the array's header,
+// or beyond the array end within the heap.
+//
+// The lower edge can be rounded down to the nearest jint and the
+// upper edge can be rounded up to the nearest MinObjAlignmentInBytes.
+//
+// Arguments:
+//   adr_type           memory slice where writes are generated
+//   dest               oop of the destination array
+//   basic_elem_type    element type of the destination
+//   slice_idx          array index of first element to store
+//   slice_len          number of elements to store (or NULL)
+//   dest_size          total size in bytes of the array object
+//
+// Exactly one of slice_len or dest_size must be non-NULL.
+// If dest_size is non-NULL, zeroing extends to the end of the object.
+// If slice_len is non-NULL, the slice_idx value must be a constant.
+void
+LibraryCallKit::generate_clear_array(const TypePtr* adr_type,
+                                     Node* dest,
+                                     BasicType basic_elem_type,
+                                     Node* slice_idx,
+                                     Node* slice_len,
+                                     Node* dest_size) {
+  // one or the other but not both of slice_len and dest_size:
+  assert((slice_len != NULL? 1: 0) + (dest_size != NULL? 1: 0) == 1, "");
+  if (slice_len == NULL)  slice_len = top();
+  if (dest_size == NULL)  dest_size = top();
+
+  // operate on this memory slice:
+  Node* mem = memory(adr_type); // memory slice to operate on
+
+  // scaling and rounding of indexes:
+  int scale = exact_log2(type2aelembytes[basic_elem_type]);
+  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
+  int clear_low = (-1 << scale) & (BytesPerInt  - 1);
+  int bump_bit  = (-1 << scale) & BytesPerInt;
+
+  // determine constant starts and ends
+  const intptr_t BIG_NEG = -128;
+  assert(BIG_NEG + 2*abase < 0, "neg enough");
+  intptr_t slice_idx_con = (intptr_t) find_int_con(slice_idx, BIG_NEG);
+  intptr_t slice_len_con = (intptr_t) find_int_con(slice_len, BIG_NEG);
+  if (slice_len_con == 0) {
+    return;                     // nothing to do here
+  }
+  intptr_t start_con = (abase + (slice_idx_con << scale)) & ~clear_low;
+  intptr_t end_con   = find_intptr_t_con(dest_size, -1);
+  if (slice_idx_con >= 0 && slice_len_con >= 0) {
+    assert(end_con < 0, "not two cons");
+    end_con = round_to(abase + ((slice_idx_con + slice_len_con) << scale),
+                       BytesPerLong);
+  }
+
+  if (start_con >= 0 && end_con >= 0) {
+    // Constant start and end.  Simple.
+    mem = ClearArrayNode::clear_memory(control(), mem, dest,
+                                       start_con, end_con, &_gvn);
+  } else if (start_con >= 0 && dest_size != top()) {
+    // Constant start, pre-rounded end after the tail of the array.
+    Node* end = dest_size;
+    mem = ClearArrayNode::clear_memory(control(), mem, dest,
+                                       start_con, end, &_gvn);
+  } else if (start_con >= 0 && slice_len != top()) {
+    // Constant start, non-constant end.  End needs rounding up.
+    // End offset = round_up(abase + ((slice_idx_con + slice_len) << scale), 8)
+    intptr_t end_base  = abase + (slice_idx_con << scale);
+    int      end_round = (-1 << scale) & (BytesPerLong  - 1);
+    Node*    end       = ConvI2X(slice_len);
+    if (scale != 0)
+      end = _gvn.transform( new(C,3) LShiftXNode(end, intcon(scale) ));
+    end_base += end_round;
+    end = _gvn.transform( new(C,3) AddXNode(end, MakeConX(end_base)) );
+    end = _gvn.transform( new(C,3) AndXNode(end, MakeConX(~end_round)) );
+    mem = ClearArrayNode::clear_memory(control(), mem, dest,
+                                       start_con, end, &_gvn);
+  } else if (start_con < 0 && dest_size != top()) {
+    // Non-constant start, pre-rounded end after the tail of the array.
+    // This is almost certainly a "round-to-end" operation.
+    Node* start = slice_idx;
+    start = ConvI2X(start);
+    if (scale != 0)
+      start = _gvn.transform( new(C,3) LShiftXNode( start, intcon(scale) ));
+    start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(abase)) );
+    if ((bump_bit | clear_low) != 0) {
+      int to_clear = (bump_bit | clear_low);
+      // Align up mod 8, then store a jint zero unconditionally
+      // just before the mod-8 boundary.
+      // This would only fail if the first array element were immediately
+      // after the length field, and were also at an even offset mod 8.
+      assert(((abase + bump_bit) & ~to_clear) - BytesPerInt
+             >= arrayOopDesc::length_offset_in_bytes() + BytesPerInt,
+             "store must not trash length field");
+
+      // Bump 'start' up to (or past) the next jint boundary:
+      start = _gvn.transform( new(C,3) AddXNode(start, MakeConX(bump_bit)) );
+      // Round bumped 'start' down to jlong boundary in body of array.
+      start = _gvn.transform( new(C,3) AndXNode(start, MakeConX(~to_clear)) );
+      // Store a zero to the immediately preceding jint:
+      Node* x1 = _gvn.transform( new(C,3) AddXNode(start, MakeConX(-BytesPerInt)) );
+      Node* p1 = basic_plus_adr(dest, x1);
+      mem = StoreNode::make(C, control(), mem, p1, adr_type, intcon(0), T_INT);
+      mem = _gvn.transform(mem);
+    }
+
+    Node* end = dest_size; // pre-rounded
+    mem = ClearArrayNode::clear_memory(control(), mem, dest,
+                                       start, end, &_gvn);
+  } else {
+    // Non-constant start, unrounded non-constant end.
+    // (Nobody zeroes a random midsection of an array using this routine.)
+    ShouldNotReachHere();       // fix caller
+  }
+
+  // Done.
+  set_memory(mem, adr_type);
+}
+
+
+bool
+LibraryCallKit::generate_block_arraycopy(const TypePtr* adr_type,
+                                         BasicType basic_elem_type,
+                                         AllocateNode* alloc,
+                                         Node* src,  Node* src_offset,
+                                         Node* dest, Node* dest_offset,
+                                         Node* dest_size) {
+  // See if there is an advantage from block transfer.
+  int scale = exact_log2(type2aelembytes[basic_elem_type]);
+  if (scale >= LogBytesPerLong)
+    return false;               // it is already a block transfer
+
+  // Look at the alignment of the starting offsets.
+  int abase = arrayOopDesc::base_offset_in_bytes(basic_elem_type);
+  const intptr_t BIG_NEG = -128;
+  assert(BIG_NEG + 2*abase < 0, "neg enough");
+
+  intptr_t src_off  = abase + ((intptr_t) find_int_con(src_offset, -1)  << scale);
+  intptr_t dest_off = abase + ((intptr_t) find_int_con(dest_offset, -1) << scale);
+  if (src_off < 0 || dest_off < 0)
+    // At present, we can only understand constants.
+    return false;
+
+  if (((src_off | dest_off) & (BytesPerLong-1)) != 0) {
+    // Non-aligned; too bad.
+    // One more chance:  Pick off an initial 32-bit word.
+    // This is a common case, since abase can be odd mod 8.
+    if (((src_off | dest_off) & (BytesPerLong-1)) == BytesPerInt &&
+        ((src_off ^ dest_off) & (BytesPerLong-1)) == 0) {
+      Node* sptr = basic_plus_adr(src,  src_off);
+      Node* dptr = basic_plus_adr(dest, dest_off);
+      Node* sval = make_load(control(), sptr, TypeInt::INT, T_INT, adr_type);
+      store_to_memory(control(), dptr, sval, T_INT, adr_type);
+      src_off += BytesPerInt;
+      dest_off += BytesPerInt;
+    } else {
+      return false;
+    }
+  }
+  assert(src_off % BytesPerLong == 0, "");
+  assert(dest_off % BytesPerLong == 0, "");
+
+  // Do this copy by giant steps.
+  Node* sptr  = basic_plus_adr(src,  src_off);
+  Node* dptr  = basic_plus_adr(dest, dest_off);
+  Node* countx = dest_size;
+  countx = _gvn.transform( new (C, 3) SubXNode(countx, MakeConX(dest_off)) );
+  countx = _gvn.transform( new (C, 3) URShiftXNode(countx, intcon(LogBytesPerLong)) );
+
+  bool disjoint_bases = true;   // since alloc != NULL
+  generate_unchecked_arraycopy(adr_type, T_LONG, disjoint_bases,
+                               sptr, NULL, dptr, NULL, countx);
+
+  return true;
+}
+
+
+// Helper function; generates code for the slow case.
+// We make a call to a runtime method which emulates the native method,
+// but without the native wrapper overhead.
+void
+LibraryCallKit::generate_slow_arraycopy(const TypePtr* adr_type,
+                                        Node* src,  Node* src_offset,
+                                        Node* dest, Node* dest_offset,
+                                        Node* copy_length,
+                                        int nargs) {
+  _sp += nargs; // any deopt will start just before call to enclosing method
+  Node* call = make_runtime_call(RC_NO_LEAF | RC_UNCOMMON,
+                                 OptoRuntime::slow_arraycopy_Type(),
+                                 OptoRuntime::slow_arraycopy_Java(),
+                                 "slow_arraycopy", adr_type,
+                                 src, src_offset, dest, dest_offset,
+                                 copy_length);
+  _sp -= nargs;
+
+  // Handle exceptions thrown by this fellow:
+  make_slow_call_ex(call, env()->Throwable_klass(), false);
+}
+
+// Helper function; generates code for cases requiring runtime checks.
+Node*
+LibraryCallKit::generate_checkcast_arraycopy(const TypePtr* adr_type,
+                                             Node* dest_elem_klass,
+                                             Node* src,  Node* src_offset,
+                                             Node* dest, Node* dest_offset,
+                                             Node* copy_length,
+                                             int nargs) {
+  if (stopped())  return NULL;
+
+  address copyfunc_addr = StubRoutines::checkcast_arraycopy();
+  if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
+    return NULL;
+  }
+
+  // Pick out the parameters required to perform a store-check
+  // for the target array.  This is an optimistic check.  It will
+  // look in each non-null element's class, at the desired klass's
+  // super_check_offset, for the desired klass.
+  int sco_offset = Klass::super_check_offset_offset_in_bytes() + sizeof(oopDesc);
+  Node* p3 = basic_plus_adr(dest_elem_klass, sco_offset);
+  Node* n3 = new(C, 3) LoadINode(NULL, immutable_memory(), p3, TypeRawPtr::BOTTOM);
+  Node* check_offset = _gvn.transform(n3);
+  Node* check_value  = dest_elem_klass;
+
+  Node* src_start  = array_element_address(src,  src_offset,  T_OBJECT);
+  Node* dest_start = array_element_address(dest, dest_offset, T_OBJECT);
+
+  // (We know the arrays are never conjoint, because their types differ.)
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                                 OptoRuntime::checkcast_arraycopy_Type(),
+                                 copyfunc_addr, "checkcast_arraycopy", adr_type,
+                                 // five arguments, of which two are
+                                 // intptr_t (jlong in LP64)
+                                 src_start, dest_start,
+                                 copy_length XTOP,
+                                 check_offset XTOP,
+                                 check_value);
+
+  return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+}
+
+
+// Helper function; generates code for cases requiring runtime checks.
+Node*
+LibraryCallKit::generate_generic_arraycopy(const TypePtr* adr_type,
+                                           Node* src,  Node* src_offset,
+                                           Node* dest, Node* dest_offset,
+                                           Node* copy_length,
+                                           int nargs) {
+  if (stopped())  return NULL;
+
+  address copyfunc_addr = StubRoutines::generic_arraycopy();
+  if (copyfunc_addr == NULL) { // Stub was not generated, go slow path.
+    return NULL;
+  }
+
+  Node* call = make_runtime_call(RC_LEAF|RC_NO_FP,
+                    OptoRuntime::generic_arraycopy_Type(),
+                    copyfunc_addr, "generic_arraycopy", adr_type,
+                    src, src_offset, dest, dest_offset, copy_length);
+
+  return _gvn.transform(new (C, 1) ProjNode(call, TypeFunc::Parms));
+}
+
+// Helper function; generates the fast out-of-line call to an arraycopy stub.
+void
+LibraryCallKit::generate_unchecked_arraycopy(const TypePtr* adr_type,
+                                             BasicType basic_elem_type,
+                                             bool disjoint_bases,
+                                             Node* src,  Node* src_offset,
+                                             Node* dest, Node* dest_offset,
+                                             Node* copy_length) {
+  if (stopped())  return;               // nothing to do
+
+  Node* src_start  = src;
+  Node* dest_start = dest;
+  if (src_offset != NULL || dest_offset != NULL) {
+    assert(src_offset != NULL && dest_offset != NULL, "");
+    src_start  = array_element_address(src,  src_offset,  basic_elem_type);
+    dest_start = array_element_address(dest, dest_offset, basic_elem_type);
+  }
+
+  // Figure out which arraycopy runtime method to call.
+  const char* copyfunc_name = "arraycopy";
+  address     copyfunc_addr =
+      basictype2arraycopy(basic_elem_type, src_offset, dest_offset,
+                          disjoint_bases, copyfunc_name);
+
+  // Call it.  Note that the count_ix value is not scaled to a byte-size.
+  make_runtime_call(RC_LEAF|RC_NO_FP,
+                    OptoRuntime::fast_arraycopy_Type(),
+                    copyfunc_addr, copyfunc_name, adr_type,
+                    src_start, dest_start, copy_length XTOP);
+}
diff --git a/src/share/vm/opto/live.cpp b/src/share/vm/opto/live.cpp
new file mode 100644
index 000000000..4127f67e1
--- /dev/null
+++ b/src/share/vm/opto/live.cpp
@@ -0,0 +1,314 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_live.cpp.incl"
+
+
+
+//=============================================================================
+//------------------------------PhaseLive--------------------------------------
+// Compute live-in/live-out.  We use a totally incremental algorithm.  The LIVE
+// problem is monotonic.  The steady-state solution looks like this: pull a
+// block from the worklist.  It has a set of delta's - values which are newly
+// live-in from the block.  Push these to the live-out sets of all predecessor
+// blocks.  At each predecessor, the new live-out values are ANDed with what is
+// already live-out (extra stuff is added to the live-out sets).  Then the
+// remaining new live-out values are ANDed with what is locally defined.
+// Leftover bits become the new live-in for the predecessor block, and the pred
+// block is put on the worklist.
+//   The locally live-in stuff is computed once and added to predecessor
+// live-out sets.  This seperate compilation is done in the outer loop below.
+PhaseLive::PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena ) : Phase(LIVE), _cfg(cfg), _names(names), _arena(arena), _live(0) {
+}
+
+void PhaseLive::compute(uint maxlrg) {
+  _maxlrg   = maxlrg;
+  _worklist = new (_arena) Block_List();
+
+  // Init the sparse live arrays.  This data is live on exit from here!
+  // The _live info is the live-out info.
+  _live = (IndexSet*)_arena->Amalloc(sizeof(IndexSet)*_cfg._num_blocks);
+  uint i;
+  for( i=0; i<_cfg._num_blocks; i++ ) {
+    _live[i].initialize(_maxlrg);
+  }
+
+  // Init the sparse arrays for delta-sets.
+  ResourceMark rm;              // Nuke temp storage on exit
+
+  // Does the memory used by _defs and _deltas get reclaimed?  Does it matter?  TT
+
+  // Array of values defined locally in blocks
+  _defs = NEW_RESOURCE_ARRAY(IndexSet,_cfg._num_blocks);
+  for( i=0; i<_cfg._num_blocks; i++ ) {
+    _defs[i].initialize(_maxlrg);
+  }
+
+  // Array of delta-set pointers, indexed by block pre_order-1.
+  _deltas = NEW_RESOURCE_ARRAY(IndexSet*,_cfg._num_blocks);
+  memset( _deltas, 0, sizeof(IndexSet*)* _cfg._num_blocks);
+
+  _free_IndexSet = NULL;
+
+  // Blocks having done pass-1
+  VectorSet first_pass(Thread::current()->resource_area());
+
+  // Outer loop: must compute local live-in sets and push into predecessors.
+  uint iters = _cfg._num_blocks;        // stat counters
+  for( uint j=_cfg._num_blocks; j>0; j-- ) {
+    Block *b = _cfg._blocks[j-1];
+
+    // Compute the local live-in set.  Start with any new live-out bits.
+    IndexSet *use = getset( b );
+    IndexSet *def = &_defs[b->_pre_order-1];
+    DEBUG_ONLY(IndexSet *def_outside = getfreeset();)
+    uint i;
+    for( i=b->_nodes.size(); i>1; i-- ) {
+      Node *n = b->_nodes[i-1];
+      if( n->is_Phi() ) break;
+
+      uint r = _names[n->_idx];
+      assert(!def_outside->member(r), "Use of external LRG overlaps the same LRG defined in this block");
+      def->insert( r );
+      use->remove( r );
+      uint cnt = n->req();
+      for( uint k=1; k<cnt; k++ ) {
+        Node *nk = n->in(k);
+        uint nkidx = nk->_idx;
+        if( _cfg._bbs[nkidx] != b ) {
+          uint u = _names[nkidx];
+          use->insert( u );
+          DEBUG_ONLY(def_outside->insert( u );)
+        }
+      }
+    }
+#ifdef ASSERT
+    def_outside->set_next(_free_IndexSet);
+    _free_IndexSet = def_outside;     // Drop onto free list
+#endif
+    // Remove anything defined by Phis and the block start instruction
+    for( uint k=i; k>0; k-- ) {
+      uint r = _names[b->_nodes[k-1]->_idx];
+      def->insert( r );
+      use->remove( r );
+    }
+
+    // Push these live-in things to predecessors
+    for( uint l=1; l<b->num_preds(); l++ ) {
+      Block *p = _cfg._bbs[b->pred(l)->_idx];
+      add_liveout( p, use, first_pass );
+
+      // PhiNode uses go in the live-out set of prior blocks.
+      for( uint k=i; k>0; k-- )
+        add_liveout( p, _names[b->_nodes[k-1]->in(l)->_idx], first_pass );
+    }
+    freeset( b );
+    first_pass.set(b->_pre_order);
+
+    // Inner loop: blocks that picked up new live-out values to be propagated
+    while( _worklist->size() ) {
+        // !!!!!
+// #ifdef ASSERT
+      iters++;
+// #endif
+      Block *b = _worklist->pop();
+      IndexSet *delta = getset(b);
+      assert( delta->count(), "missing delta set" );
+
+      // Add new-live-in to predecessors live-out sets
+      for( uint l=1; l<b->num_preds(); l++ )
+        add_liveout( _cfg._bbs[b->pred(l)->_idx], delta, first_pass );
+
+      freeset(b);
+    } // End of while-worklist-not-empty
+
+  } // End of for-all-blocks-outer-loop
+
+  // We explicitly clear all of the IndexSets which we are about to release.
+  // This allows us to recycle their internal memory into IndexSet's free list.
+
+  for( i=0; i<_cfg._num_blocks; i++ ) {
+    _defs[i].clear();
+    if (_deltas[i]) {
+      // Is this always true?
+      _deltas[i]->clear();
+    }
+  }
+  IndexSet *free = _free_IndexSet;
+  while (free != NULL) {
+    IndexSet *temp = free;
+    free = free->next();
+    temp->clear();
+  }
+
+}
+
+//------------------------------stats------------------------------------------
+#ifndef PRODUCT
+void PhaseLive::stats(uint iters) const {
+}
+#endif
+
+//------------------------------getset-----------------------------------------
+// Get an IndexSet for a block.  Return existing one, if any.  Make a new
+// empty one if a prior one does not exist.
+IndexSet *PhaseLive::getset( Block *p ) {
+  IndexSet *delta = _deltas[p->_pre_order-1];
+  if( !delta )                  // Not on worklist?
+    // Get a free set; flag as being on worklist
+    delta = _deltas[p->_pre_order-1] = getfreeset();
+  return delta;                 // Return set of new live-out items
+}
+
+//------------------------------getfreeset-------------------------------------
+// Pull from free list, or allocate.  Internal allocation on the returned set
+// is always from thread local storage.
+IndexSet *PhaseLive::getfreeset( ) {
+  IndexSet *f = _free_IndexSet;
+  if( !f ) {
+    f = new IndexSet;
+//    f->set_arena(Thread::current()->resource_area());
+    f->initialize(_maxlrg, Thread::current()->resource_area());
+  } else {
+    // Pull from free list
+    _free_IndexSet = f->next();
+  //f->_cnt = 0;                        // Reset to empty
+//    f->set_arena(Thread::current()->resource_area());
+    f->initialize(_maxlrg, Thread::current()->resource_area());
+  }
+  return f;
+}
+
+//------------------------------freeset----------------------------------------
+// Free an IndexSet from a block.
+void PhaseLive::freeset( const Block *p ) {
+  IndexSet *f = _deltas[p->_pre_order-1];
+  f->set_next(_free_IndexSet);
+  _free_IndexSet = f;           // Drop onto free list
+  _deltas[p->_pre_order-1] = NULL;
+}
+
+//------------------------------add_liveout------------------------------------
+// Add a live-out value to a given blocks live-out set.  If it is new, then
+// also add it to the delta set and stick the block on the worklist.
+void PhaseLive::add_liveout( Block *p, uint r, VectorSet &first_pass ) {
+  IndexSet *live = &_live[p->_pre_order-1];
+  if( live->insert(r) ) {       // If actually inserted...
+    // We extended the live-out set.  See if the value is generated locally.
+    // If it is not, then we must extend the live-in set.
+    if( !_defs[p->_pre_order-1].member( r ) ) {
+      if( !_deltas[p->_pre_order-1] && // Not on worklist?
+          first_pass.test(p->_pre_order) )
+        _worklist->push(p);     // Actually go on worklist if already 1st pass
+      getset(p)->insert(r);
+    }
+  }
+}
+
+
+//------------------------------add_liveout------------------------------------
+// Add a vector of live-out values to a given blocks live-out set.
+void PhaseLive::add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass ) {
+  IndexSet *live = &_live[p->_pre_order-1];
+  IndexSet *defs = &_defs[p->_pre_order-1];
+  IndexSet *on_worklist = _deltas[p->_pre_order-1];
+  IndexSet *delta = on_worklist ? on_worklist : getfreeset();
+
+  IndexSetIterator elements(lo);
+  uint r;
+  while ((r = elements.next()) != 0) {
+    if( live->insert(r) &&      // If actually inserted...
+        !defs->member( r ) )    // and not defined locally
+      delta->insert(r);         // Then add to live-in set
+  }
+
+  if( delta->count() ) {                // If actually added things
+    _deltas[p->_pre_order-1] = delta; // Flag as on worklist now
+    if( !on_worklist &&         // Not on worklist?
+        first_pass.test(p->_pre_order) )
+      _worklist->push(p);       // Actually go on worklist if already 1st pass
+  } else {                      // Nothing there; just free it
+    delta->set_next(_free_IndexSet);
+    _free_IndexSet = delta;     // Drop onto free list
+  }
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+// Dump the live-out set for a block
+void PhaseLive::dump( const Block *b ) const {
+  tty->print("Block %d: ",b->_pre_order);
+  tty->print("LiveOut: ");  _live[b->_pre_order-1].dump();
+  uint cnt = b->_nodes.size();
+  for( uint i=0; i<cnt; i++ ) {
+    tty->print("L%d/", _names[b->_nodes[i]->_idx] );
+    b->_nodes[i]->dump();
+  }
+  tty->print("\n");
+}
+
+//------------------------------verify_base_ptrs-------------------------------
+// Verify that base pointers and derived pointers are still sane.
+// Basically, if a derived pointer is live at a safepoint, then its
+// base pointer must be live also.
+void PhaseChaitin::verify_base_ptrs( ResourceArea *a ) const {
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    Block *b = _cfg._blocks[i];
+    for( uint j = b->end_idx() + 1; j > 1; j-- ) {
+      Node *n = b->_nodes[j-1];
+      if( n->is_Phi() ) break;
+      // Found a safepoint?
+      if( n->is_MachSafePoint() ) {
+        MachSafePointNode *sfpt = n->as_MachSafePoint();
+        JVMState* jvms = sfpt->jvms();
+        if (jvms != NULL) {
+          // Now scan for a live derived pointer
+          if (jvms->oopoff() < sfpt->req()) {
+            // Check each derived/base pair
+            for (uint idx = jvms->oopoff(); idx < sfpt->req(); idx += 2) {
+              Node *check = sfpt->in(idx);
+              uint j = 0;
+              // search upwards through spills and spill phis for AddP
+              while(true) {
+                if( !check ) break;
+                int idx = check->is_Copy();
+                if( idx ) {
+                  check = check->in(idx);
+                } else if( check->is_Phi() && check->_idx >= _oldphi ) {
+                  check = check->in(1);
+                } else
+                  break;
+                j++;
+                assert(j < 100000,"Derived pointer checking in infinite loop");
+              } // End while
+              assert(check->is_Mach() && check->as_Mach()->ideal_Opcode() == Op_AddP,"Bad derived pointer")
+            }
+          } // End of check for derived pointers
+        } // End of Kcheck for debug info
+      } // End of if found a safepoint
+    } // End of forall instructions in block
+  } // End of forall blocks
+}
+#endif
diff --git a/src/share/vm/opto/live.hpp b/src/share/vm/opto/live.hpp
new file mode 100644
index 000000000..886f28f57
--- /dev/null
+++ b/src/share/vm/opto/live.hpp
@@ -0,0 +1,74 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Block;
+class LRG_List;
+class PhaseCFG;
+class VectorSet;
+class IndexSet;
+
+//------------------------------PhaseLive--------------------------------------
+// Compute live-in/live-out
+class PhaseLive : public Phase {
+  // Array of Sets of values live at the start of a block.
+  // Indexed by block pre-order number.
+  IndexSet *_live;
+
+  // Array of Sets of values defined locally in the block
+  // Indexed by block pre-order number.
+  IndexSet *_defs;
+
+  // Array of delta-set pointers, indexed by block pre-order number
+  IndexSet **_deltas;
+  IndexSet *_free_IndexSet;     // Free list of same
+
+  Block_List *_worklist;        // Worklist for iterative solution
+
+  const PhaseCFG &_cfg;         // Basic blocks
+  LRG_List &_names;             // Mapping from Nodes to live ranges
+  uint _maxlrg;                 // Largest live-range number
+  Arena *_arena;
+
+  IndexSet *getset( Block *p );
+  IndexSet *getfreeset( );
+  void freeset( const Block *p );
+  void add_liveout( Block *p, uint r, VectorSet &first_pass );
+  void add_liveout( Block *p, IndexSet *lo, VectorSet &first_pass );
+
+public:
+  PhaseLive( const PhaseCFG &cfg, LRG_List &names, Arena *arena );
+  ~PhaseLive() {}
+  // Compute liveness info
+  void compute(uint maxlrg);
+  // Reset arena storage
+  void reset() { _live = NULL; }
+
+  // Return the live-out set for this block
+  IndexSet *live( const Block * b ) { return &_live[b->_pre_order-1]; }
+
+#ifndef PRODUCT
+  void dump( const Block *b ) const;
+  void stats(uint iters) const;
+#endif
+};
diff --git a/src/share/vm/opto/locknode.cpp b/src/share/vm/opto/locknode.cpp
new file mode 100644
index 000000000..90da8efaa
--- /dev/null
+++ b/src/share/vm/opto/locknode.cpp
@@ -0,0 +1,122 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_locknode.cpp.incl"
+
+//=============================================================================
+const RegMask &BoxLockNode::in_RegMask(uint i) const {
+  return _inmask;
+}
+
+const RegMask &BoxLockNode::out_RegMask() const {
+  return *Matcher::idealreg2regmask[Op_RegP];
+}
+
+uint BoxLockNode::size_of() const { return sizeof(*this); }
+
+BoxLockNode::BoxLockNode( int slot ) : Node( Compile::current()->root() ), _slot(slot) {
+  init_class_id(Class_BoxLock);
+  init_flags(Flag_rematerialize);
+  OptoReg::Name reg = OptoReg::stack2reg(_slot);
+  _inmask.Insert(reg);
+}
+
+//------------------------------cmp--------------------------------------------
+uint BoxLockNode::cmp( const Node &n ) const {
+  const BoxLockNode &bn = (const BoxLockNode &)n;
+  return bn._slot == _slot;
+}
+
+OptoReg::Name BoxLockNode::stack_slot(Node* box_node) {
+  // Chase down the BoxNode
+  while (!box_node->is_BoxLock()) {
+    //    if (box_node->is_SpillCopy()) {
+    //      Node *m = box_node->in(1);
+    //      if (m->is_Mach() && m->as_Mach()->ideal_Opcode() == Op_StoreP) {
+    //        box_node = m->in(m->as_Mach()->operand_index(2));
+    //        continue;
+    //      }
+    //    }
+    assert(box_node->is_SpillCopy() || box_node->is_Phi(), "Bad spill of Lock.");
+    box_node = box_node->in(1);
+  }
+  return box_node->in_RegMask(0).find_first_elem();
+}
+
+//=============================================================================
+//-----------------------------hash--------------------------------------------
+uint FastLockNode::hash() const { return NO_HASH; }
+
+//------------------------------cmp--------------------------------------------
+uint FastLockNode::cmp( const Node &n ) const {
+  return (&n == this);                // Always fail except on self
+}
+
+//=============================================================================
+//-----------------------------hash--------------------------------------------
+uint FastUnlockNode::hash() const { return NO_HASH; }
+
+//------------------------------cmp--------------------------------------------
+uint FastUnlockNode::cmp( const Node &n ) const {
+  return (&n == this);                // Always fail except on self
+}
+
+//
+// Create a counter which counts the number of times this lock is acquired
+//
+void FastLockNode::create_lock_counter(JVMState* state) {
+  BiasedLockingNamedCounter* blnc = (BiasedLockingNamedCounter*)
+           OptoRuntime::new_named_counter(state, NamedCounter::BiasedLockingCounter);
+  _counters = blnc->counters();
+}
+
+//=============================================================================
+//------------------------------do_monitor_enter-------------------------------
+void Parse::do_monitor_enter() {
+  kill_dead_locals();
+
+  // Null check; get casted pointer.
+  Node *obj = do_null_check(peek(), T_OBJECT);
+  // Check for locking null object
+  if (stopped()) return;
+
+  // the monitor object is not part of debug info expression stack
+  pop();
+
+  // Insert a FastLockNode which takes as arguments the current thread pointer,
+  // the obj pointer & the address of the stack slot pair used for the lock.
+  shared_lock(obj);
+}
+
+//------------------------------do_monitor_exit--------------------------------
+void Parse::do_monitor_exit() {
+  kill_dead_locals();
+
+  pop();                        // Pop oop to unlock
+  // Because monitors are guarenteed paired (else we bail out), we know
+  // the matching Lock for this Unlock.  Hence we know there is no need
+  // for a null check on Unlock.
+  shared_unlock(map()->peek_monitor_box(), map()->peek_monitor_obj());
+}
diff --git a/src/share/vm/opto/locknode.hpp b/src/share/vm/opto/locknode.hpp
new file mode 100644
index 000000000..6b1a8883c
--- /dev/null
+++ b/src/share/vm/opto/locknode.hpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------BoxLockNode------------------------------------
+class BoxLockNode : public Node {
+public:
+  const int _slot;
+  RegMask   _inmask;
+
+  BoxLockNode( int lock );
+  virtual int Opcode() const;
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual const RegMask &out_RegMask() const;
+  virtual uint size_of() const;
+  virtual uint hash() const { return Node::hash() + _slot; }
+  virtual uint cmp( const Node &n ) const;
+  virtual const class Type *bottom_type() const { return TypeRawPtr::BOTTOM; }
+  virtual uint ideal_reg() const { return Op_RegP; }
+
+  static OptoReg::Name stack_slot(Node* box_node);
+
+#ifndef PRODUCT
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+  virtual void dump_spec(outputStream *st) const { st->print("  Lock %d",_slot); }
+#endif
+};
+
+//------------------------------FastLockNode-----------------------------------
+class FastLockNode: public CmpNode {
+private:
+  BiasedLockingCounters* _counters;
+
+public:
+  FastLockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
+    init_req(0,ctrl);
+    init_class_id(Class_FastLock);
+    _counters = NULL;
+  }
+  Node* obj_node() const { return in(1); }
+  Node* box_node() const { return in(2); }
+
+  // FastLock and FastUnlockNode do not hash, we need one for each correspoding
+  // LockNode/UnLockNode to avoid creating Phi's.
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
+  const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+  void create_lock_counter(JVMState* s);
+  BiasedLockingCounters* counters() const { return _counters; }
+};
+
+
+//------------------------------FastUnlockNode---------------------------------
+class FastUnlockNode: public CmpNode {
+public:
+  FastUnlockNode(Node *ctrl, Node *oop, Node *box) : CmpNode(oop,box) {
+    init_req(0,ctrl);
+    init_class_id(Class_FastUnlock);
+  }
+  Node* obj_node() const { return in(1); }
+  Node* box_node() const { return in(2); }
+
+
+  // FastLock and FastUnlockNode do not hash, we need one for each correspoding
+  // LockNode/UnLockNode to avoid creating Phi's.
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const { return TypeInt::CC; }
+  const Type *sub(const Type *t1, const Type *t2) const { return TypeInt::CC;}
+
+};
diff --git a/src/share/vm/opto/loopTransform.cpp b/src/share/vm/opto/loopTransform.cpp
new file mode 100644
index 000000000..3de4e0cd7
--- /dev/null
+++ b/src/share/vm/opto/loopTransform.cpp
@@ -0,0 +1,1729 @@
+/*
+ * Copyright 2000-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopTransform.cpp.incl"
+
+//------------------------------is_loop_exit-----------------------------------
+// Given an IfNode, return the loop-exiting projection or NULL if both
+// arms remain in the loop.
+Node *IdealLoopTree::is_loop_exit(Node *iff) const {
+  if( iff->outcnt() != 2 ) return NULL; // Ignore partially dead tests
+  PhaseIdealLoop *phase = _phase;
+  // Test is an IfNode, has 2 projections.  If BOTH are in the loop
+  // we need loop unswitching instead of peeling.
+  if( !is_member(phase->get_loop( iff->raw_out(0) )) )
+    return iff->raw_out(0);
+  if( !is_member(phase->get_loop( iff->raw_out(1) )) )
+    return iff->raw_out(1);
+  return NULL;
+}
+
+
+//=============================================================================
+
+
+//------------------------------record_for_igvn----------------------------
+// Put loop body on igvn work list
+void IdealLoopTree::record_for_igvn() {
+  for( uint i = 0; i < _body.size(); i++ ) {
+    Node *n = _body.at(i);
+    _phase->_igvn._worklist.push(n);
+  }
+}
+
+//------------------------------compute_profile_trip_cnt----------------------------
+// Compute loop trip count from profile data as
+//    (backedge_count + loop_exit_count) / loop_exit_count
+void IdealLoopTree::compute_profile_trip_cnt( PhaseIdealLoop *phase ) {
+  if (!_head->is_CountedLoop()) {
+    return;
+  }
+  CountedLoopNode* head = _head->as_CountedLoop();
+  if (head->profile_trip_cnt() != COUNT_UNKNOWN) {
+    return; // Already computed
+  }
+  float trip_cnt = (float)max_jint; // default is big
+
+  Node* back = head->in(LoopNode::LoopBackControl);
+  while (back != head) {
+    if ((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
+        back->in(0) &&
+        back->in(0)->is_If() &&
+        back->in(0)->as_If()->_fcnt != COUNT_UNKNOWN &&
+        back->in(0)->as_If()->_prob != PROB_UNKNOWN) {
+      break;
+    }
+    back = phase->idom(back);
+  }
+  if (back != head) {
+    assert((back->Opcode() == Op_IfTrue || back->Opcode() == Op_IfFalse) &&
+           back->in(0), "if-projection exists");
+    IfNode* back_if = back->in(0)->as_If();
+    float loop_back_cnt = back_if->_fcnt * back_if->_prob;
+
+    // Now compute a loop exit count
+    float loop_exit_cnt = 0.0f;
+    for( uint i = 0; i < _body.size(); i++ ) {
+      Node *n = _body[i];
+      if( n->is_If() ) {
+        IfNode *iff = n->as_If();
+        if( iff->_fcnt != COUNT_UNKNOWN && iff->_prob != PROB_UNKNOWN ) {
+          Node *exit = is_loop_exit(iff);
+          if( exit ) {
+            float exit_prob = iff->_prob;
+            if (exit->Opcode() == Op_IfFalse) exit_prob = 1.0 - exit_prob;
+            if (exit_prob > PROB_MIN) {
+              float exit_cnt = iff->_fcnt * exit_prob;
+              loop_exit_cnt += exit_cnt;
+            }
+          }
+        }
+      }
+    }
+    if (loop_exit_cnt > 0.0f) {
+      trip_cnt = (loop_back_cnt + loop_exit_cnt) / loop_exit_cnt;
+    } else {
+      // No exit count so use
+      trip_cnt = loop_back_cnt;
+    }
+  }
+#ifndef PRODUCT
+  if (TraceProfileTripCount) {
+    tty->print_cr("compute_profile_trip_cnt  lp: %d cnt: %f\n", head->_idx, trip_cnt);
+  }
+#endif
+  head->set_profile_trip_cnt(trip_cnt);
+}
+
+//---------------------is_invariant_addition-----------------------------
+// Return nonzero index of invariant operand for an Add or Sub
+// of (nonconstant) invariant and variant values. Helper for reassoicate_invariants.
+int IdealLoopTree::is_invariant_addition(Node* n, PhaseIdealLoop *phase) {
+  int op = n->Opcode();
+  if (op == Op_AddI || op == Op_SubI) {
+    bool in1_invar = this->is_invariant(n->in(1));
+    bool in2_invar = this->is_invariant(n->in(2));
+    if (in1_invar && !in2_invar) return 1;
+    if (!in1_invar && in2_invar) return 2;
+  }
+  return 0;
+}
+
+//---------------------reassociate_add_sub-----------------------------
+// Reassociate invariant add and subtract expressions:
+//
+// inv1 + (x + inv2)  =>  ( inv1 + inv2) + x
+// (x + inv2) + inv1  =>  ( inv1 + inv2) + x
+// inv1 + (x - inv2)  =>  ( inv1 - inv2) + x
+// inv1 - (inv2 - x)  =>  ( inv1 - inv2) + x
+// (x + inv2) - inv1  =>  (-inv1 + inv2) + x
+// (x - inv2) + inv1  =>  ( inv1 - inv2) + x
+// (x - inv2) - inv1  =>  (-inv1 - inv2) + x
+// inv1 + (inv2 - x)  =>  ( inv1 + inv2) - x
+// inv1 - (x - inv2)  =>  ( inv1 + inv2) - x
+// (inv2 - x) + inv1  =>  ( inv1 + inv2) - x
+// (inv2 - x) - inv1  =>  (-inv1 + inv2) - x
+// inv1 - (x + inv2)  =>  ( inv1 - inv2) - x
+//
+Node* IdealLoopTree::reassociate_add_sub(Node* n1, PhaseIdealLoop *phase) {
+  if (!n1->is_Add() && !n1->is_Sub() || n1->outcnt() == 0) return NULL;
+  if (is_invariant(n1)) return NULL;
+  int inv1_idx = is_invariant_addition(n1, phase);
+  if (!inv1_idx) return NULL;
+  // Don't mess with add of constant (igvn moves them to expression tree root.)
+  if (n1->is_Add() && n1->in(2)->is_Con()) return NULL;
+  Node* inv1 = n1->in(inv1_idx);
+  Node* n2 = n1->in(3 - inv1_idx);
+  int inv2_idx = is_invariant_addition(n2, phase);
+  if (!inv2_idx) return NULL;
+  Node* x    = n2->in(3 - inv2_idx);
+  Node* inv2 = n2->in(inv2_idx);
+
+  bool neg_x    = n2->is_Sub() && inv2_idx == 1;
+  bool neg_inv2 = n2->is_Sub() && inv2_idx == 2;
+  bool neg_inv1 = n1->is_Sub() && inv1_idx == 2;
+  if (n1->is_Sub() && inv1_idx == 1) {
+    neg_x    = !neg_x;
+    neg_inv2 = !neg_inv2;
+  }
+  Node* inv1_c = phase->get_ctrl(inv1);
+  Node* inv2_c = phase->get_ctrl(inv2);
+  Node* n_inv1;
+  if (neg_inv1) {
+    Node *zero = phase->_igvn.intcon(0);
+    phase->set_ctrl(zero, phase->C->root());
+    n_inv1 = new (phase->C, 3) SubINode(zero, inv1);
+    phase->register_new_node(n_inv1, inv1_c);
+  } else {
+    n_inv1 = inv1;
+  }
+  Node* inv;
+  if (neg_inv2) {
+    inv = new (phase->C, 3) SubINode(n_inv1, inv2);
+  } else {
+    inv = new (phase->C, 3) AddINode(n_inv1, inv2);
+  }
+  phase->register_new_node(inv, phase->get_early_ctrl(inv));
+
+  Node* addx;
+  if (neg_x) {
+    addx = new (phase->C, 3) SubINode(inv, x);
+  } else {
+    addx = new (phase->C, 3) AddINode(x, inv);
+  }
+  phase->register_new_node(addx, phase->get_ctrl(x));
+  phase->_igvn.hash_delete(n1);
+  phase->_igvn.subsume_node(n1, addx);
+  return addx;
+}
+
+//---------------------reassociate_invariants-----------------------------
+// Reassociate invariant expressions:
+void IdealLoopTree::reassociate_invariants(PhaseIdealLoop *phase) {
+  for (int i = _body.size() - 1; i >= 0; i--) {
+    Node *n = _body.at(i);
+    for (int j = 0; j < 5; j++) {
+      Node* nn = reassociate_add_sub(n, phase);
+      if (nn == NULL) break;
+      n = nn; // again
+    };
+  }
+}
+
+//------------------------------policy_peeling---------------------------------
+// Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
+// make some loop-invariant test (usually a null-check) happen before the loop.
+bool IdealLoopTree::policy_peeling( PhaseIdealLoop *phase ) const {
+  Node *test = ((IdealLoopTree*)this)->tail();
+  int  body_size = ((IdealLoopTree*)this)->_body.size();
+  int  uniq      = phase->C->unique();
+  // Peeling does loop cloning which can result in O(N^2) node construction
+  if( body_size > 255 /* Prevent overflow for large body_size */
+      || (body_size * body_size + uniq > MaxNodeLimit) ) {
+    return false;           // too large to safely clone
+  }
+  while( test != _head ) {      // Scan till run off top of loop
+    if( test->is_If() ) {       // Test?
+      Node *ctrl = phase->get_ctrl(test->in(1));
+      if (ctrl->is_top())
+        return false;           // Found dead test on live IF?  No peeling!
+      // Standard IF only has one input value to check for loop invariance
+      assert( test->Opcode() == Op_If || test->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+      // Condition is not a member of this loop?
+      if( !is_member(phase->get_loop(ctrl)) &&
+          is_loop_exit(test) )
+        return true;            // Found reason to peel!
+    }
+    // Walk up dominators to loop _head looking for test which is
+    // executed on every path thru loop.
+    test = phase->idom(test);
+  }
+  return false;
+}
+
+//------------------------------peeled_dom_test_elim---------------------------
+// If we got the effect of peeling, either by actually peeling or by making
+// a pre-loop which must execute at least once, we can remove all
+// loop-invariant dominated tests in the main body.
+void PhaseIdealLoop::peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new ) {
+  bool progress = true;
+  while( progress ) {
+    progress = false;           // Reset for next iteration
+    Node *prev = loop->_head->in(LoopNode::LoopBackControl);//loop->tail();
+    Node *test = prev->in(0);
+    while( test != loop->_head ) { // Scan till run off top of loop
+
+      int p_op = prev->Opcode();
+      if( (p_op == Op_IfFalse || p_op == Op_IfTrue) &&
+          test->is_If() &&      // Test?
+          !test->in(1)->is_Con() && // And not already obvious?
+          // Condition is not a member of this loop?
+          !loop->is_member(get_loop(get_ctrl(test->in(1))))){
+        // Walk loop body looking for instances of this test
+        for( uint i = 0; i < loop->_body.size(); i++ ) {
+          Node *n = loop->_body.at(i);
+          if( n->is_If() && n->in(1) == test->in(1) /*&& n != loop->tail()->in(0)*/ ) {
+            // IfNode was dominated by version in peeled loop body
+            progress = true;
+            dominated_by( old_new[prev->_idx], n );
+          }
+        }
+      }
+      prev = test;
+      test = idom(test);
+    } // End of scan tests in loop
+
+  } // End of while( progress )
+}
+
+//------------------------------do_peeling-------------------------------------
+// Peel the first iteration of the given loop.
+// Step 1: Clone the loop body.  The clone becomes the peeled iteration.
+//         The pre-loop illegally has 2 control users (old & new loops).
+// Step 2: Make the old-loop fall-in edges point to the peeled iteration.
+//         Do this by making the old-loop fall-in edges act as if they came
+//         around the loopback from the prior iteration (follow the old-loop
+//         backedges) and then map to the new peeled iteration.  This leaves
+//         the pre-loop with only 1 user (the new peeled iteration), but the
+//         peeled-loop backedge has 2 users.
+// Step 3: Cut the backedge on the clone (so its not a loop) and remove the
+//         extra backedge user.
+void PhaseIdealLoop::do_peeling( IdealLoopTree *loop, Node_List &old_new ) {
+
+  C->set_major_progress();
+  // Peeling a 'main' loop in a pre/main/post situation obfuscates the
+  // 'pre' loop from the main and the 'pre' can no longer have it's
+  // iterations adjusted.  Therefore, we need to declare this loop as
+  // no longer a 'main' loop; it will need new pre and post loops before
+  // we can do further RCE.
+  Node *h = loop->_head;
+  if( h->is_CountedLoop() ) {
+    CountedLoopNode *cl = h->as_CountedLoop();
+    assert(cl->trip_count() > 0, "peeling a fully unrolled loop");
+    cl->set_trip_count(cl->trip_count() - 1);
+    if( cl->is_main_loop() ) {
+      cl->set_normal_loop();
+#ifndef PRODUCT
+      if( PrintOpto && VerifyLoopOptimizations ) {
+        tty->print("Peeling a 'main' loop; resetting to 'normal' ");
+        loop->dump_head();
+      }
+#endif
+    }
+  }
+
+  // Step 1: Clone the loop body.  The clone becomes the peeled iteration.
+  //         The pre-loop illegally has 2 control users (old & new loops).
+  clone_loop( loop, old_new, dom_depth(loop->_head) );
+
+
+  // Step 2: Make the old-loop fall-in edges point to the peeled iteration.
+  //         Do this by making the old-loop fall-in edges act as if they came
+  //         around the loopback from the prior iteration (follow the old-loop
+  //         backedges) and then map to the new peeled iteration.  This leaves
+  //         the pre-loop with only 1 user (the new peeled iteration), but the
+  //         peeled-loop backedge has 2 users.
+  for (DUIterator_Fast jmax, j = loop->_head->fast_outs(jmax); j < jmax; j++) {
+    Node* old = loop->_head->fast_out(j);
+    if( old->in(0) == loop->_head && old->req() == 3 &&
+        (old->is_Loop() || old->is_Phi()) ) {
+      Node *new_exit_value = old_new[old->in(LoopNode::LoopBackControl)->_idx];
+      if( !new_exit_value )     // Backedge value is ALSO loop invariant?
+        // Then loop body backedge value remains the same.
+        new_exit_value = old->in(LoopNode::LoopBackControl);
+      _igvn.hash_delete(old);
+      old->set_req(LoopNode::EntryControl, new_exit_value);
+    }
+  }
+
+
+  // Step 3: Cut the backedge on the clone (so its not a loop) and remove the
+  //         extra backedge user.
+  Node *nnn = old_new[loop->_head->_idx];
+  _igvn.hash_delete(nnn);
+  nnn->set_req(LoopNode::LoopBackControl, C->top());
+  for (DUIterator_Fast j2max, j2 = nnn->fast_outs(j2max); j2 < j2max; j2++) {
+    Node* use = nnn->fast_out(j2);
+    if( use->in(0) == nnn && use->req() == 3 && use->is_Phi() ) {
+      _igvn.hash_delete(use);
+      use->set_req(LoopNode::LoopBackControl, C->top());
+    }
+  }
+
+
+  // Step 4: Correct dom-depth info.  Set to loop-head depth.
+  int dd = dom_depth(loop->_head);
+  set_idom(loop->_head, loop->_head->in(1), dd);
+  for (uint j3 = 0; j3 < loop->_body.size(); j3++) {
+    Node *old = loop->_body.at(j3);
+    Node *nnn = old_new[old->_idx];
+    if (!has_ctrl(nnn))
+      set_idom(nnn, idom(nnn), dd-1);
+    // While we're at it, remove any SafePoints from the peeled code
+    if( old->Opcode() == Op_SafePoint ) {
+      Node *nnn = old_new[old->_idx];
+      lazy_replace(nnn,nnn->in(TypeFunc::Control));
+    }
+  }
+
+  // Now force out all loop-invariant dominating tests.  The optimizer
+  // finds some, but we _know_ they are all useless.
+  peeled_dom_test_elim(loop,old_new);
+
+  loop->record_for_igvn();
+}
+
+//------------------------------policy_maximally_unroll------------------------
+// Return exact loop trip count, or 0 if not maximally unrolling
+bool IdealLoopTree::policy_maximally_unroll( PhaseIdealLoop *phase ) const {
+  CountedLoopNode *cl = _head->as_CountedLoop();
+  assert( cl->is_normal_loop(), "" );
+
+  Node *init_n = cl->init_trip();
+  Node *limit_n = cl->limit();
+
+  // Non-constant bounds
+  if( init_n   == NULL || !init_n->is_Con()  ||
+      limit_n  == NULL || !limit_n->is_Con() ||
+      // protect against stride not being a constant
+      !cl->stride_is_con() ) {
+    return false;
+  }
+  int init   = init_n->get_int();
+  int limit  = limit_n->get_int();
+  int span   = limit - init;
+  int stride = cl->stride_con();
+
+  if (init >= limit || stride > span) {
+    // return a false (no maximally unroll) and the regular unroll/peel
+    // route will make a small mess which CCP will fold away.
+    return false;
+  }
+  uint trip_count = span/stride;   // trip_count can be greater than 2 Gig.
+  assert( (int)trip_count*stride == span, "must divide evenly" );
+
+  // Real policy: if we maximally unroll, does it get too big?
+  // Allow the unrolled mess to get larger than standard loop
+  // size.  After all, it will no longer be a loop.
+  uint body_size    = _body.size();
+  uint unroll_limit = (uint)LoopUnrollLimit * 4;
+  assert( (intx)unroll_limit == LoopUnrollLimit * 4, "LoopUnrollLimit must fit in 32bits");
+  cl->set_trip_count(trip_count);
+  if( trip_count <= unroll_limit && body_size <= unroll_limit ) {
+    uint new_body_size = body_size * trip_count;
+    if (new_body_size <= unroll_limit &&
+        body_size == new_body_size / trip_count &&
+        // Unrolling can result in a large amount of node construction
+        new_body_size < MaxNodeLimit - phase->C->unique()) {
+      return true;    // maximally unroll
+    }
+  }
+
+  return false;               // Do not maximally unroll
+}
+
+
+//------------------------------policy_unroll----------------------------------
+// Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
+// the loop is a CountedLoop and the body is small enough.
+bool IdealLoopTree::policy_unroll( PhaseIdealLoop *phase ) const {
+
+  CountedLoopNode *cl = _head->as_CountedLoop();
+  assert( cl->is_normal_loop() || cl->is_main_loop(), "" );
+
+  // protect against stride not being a constant
+  if( !cl->stride_is_con() ) return false;
+
+  // protect against over-unrolling
+  if( cl->trip_count() <= 1 ) return false;
+
+  int future_unroll_ct = cl->unrolled_count() * 2;
+
+  // Don't unroll if the next round of unrolling would push us
+  // over the expected trip count of the loop.  One is subtracted
+  // from the expected trip count because the pre-loop normally
+  // executes 1 iteration.
+  if (UnrollLimitForProfileCheck > 0 &&
+      cl->profile_trip_cnt() != COUNT_UNKNOWN &&
+      future_unroll_ct        > UnrollLimitForProfileCheck &&
+      (float)future_unroll_ct > cl->profile_trip_cnt() - 1.0) {
+    return false;
+  }
+
+  // When unroll count is greater than LoopUnrollMin, don't unroll if:
+  //   the residual iterations are more than 10% of the trip count
+  //   and rounds of "unroll,optimize" are not making significant progress
+  //   Progress defined as current size less than 20% larger than previous size.
+  if (UseSuperWord && cl->node_count_before_unroll() > 0 &&
+      future_unroll_ct > LoopUnrollMin &&
+      (future_unroll_ct - 1) * 10.0 > cl->profile_trip_cnt() &&
+      1.2 * cl->node_count_before_unroll() < (double)_body.size()) {
+    return false;
+  }
+
+  Node *init_n = cl->init_trip();
+  Node *limit_n = cl->limit();
+  // Non-constant bounds.
+  // Protect against over-unrolling when init or/and limit are not constant
+  // (so that trip_count's init value is maxint) but iv range is known.
+  if( init_n   == NULL || !init_n->is_Con()  ||
+      limit_n  == NULL || !limit_n->is_Con() ) {
+    Node* phi = cl->phi();
+    if( phi != NULL ) {
+      assert(phi->is_Phi() && phi->in(0) == _head, "Counted loop should have iv phi.");
+      const TypeInt* iv_type = phase->_igvn.type(phi)->is_int();
+      int next_stride = cl->stride_con() * 2; // stride after this unroll
+      if( next_stride > 0 ) {
+        if( iv_type->_lo + next_stride <= iv_type->_lo || // overflow
+            iv_type->_lo + next_stride >  iv_type->_hi ) {
+          return false;  // over-unrolling
+        }
+      } else if( next_stride < 0 ) {
+        if( iv_type->_hi + next_stride >= iv_type->_hi || // overflow
+            iv_type->_hi + next_stride <  iv_type->_lo ) {
+          return false;  // over-unrolling
+        }
+      }
+    }
+  }
+
+  // Adjust body_size to determine if we unroll or not
+  uint body_size = _body.size();
+  // Key test to unroll CaffeineMark's Logic test
+  int xors_in_loop = 0;
+  // Also count ModL, DivL and MulL which expand mightly
+  for( uint k = 0; k < _body.size(); k++ ) {
+    switch( _body.at(k)->Opcode() ) {
+    case Op_XorI: xors_in_loop++; break; // CaffeineMark's Logic test
+    case Op_ModL: body_size += 30; break;
+    case Op_DivL: body_size += 30; break;
+    case Op_MulL: body_size += 10; break;
+    }
+  }
+
+  // Check for being too big
+  if( body_size > (uint)LoopUnrollLimit ) {
+    if( xors_in_loop >= 4 && body_size < (uint)LoopUnrollLimit*4) return true;
+    // Normal case: loop too big
+    return false;
+  }
+
+  // Check for stride being a small enough constant
+  if( abs(cl->stride_con()) > (1<<3) ) return false;
+
+  // Unroll once!  (Each trip will soon do double iterations)
+  return true;
+}
+
+//------------------------------policy_align-----------------------------------
+// Return TRUE or FALSE if the loop should be cache-line aligned.  Gather the
+// expression that does the alignment.  Note that only one array base can be
+// aligned in a loop (unless the VM guarentees mutual alignment).  Note that
+// if we vectorize short memory ops into longer memory ops, we may want to
+// increase alignment.
+bool IdealLoopTree::policy_align( PhaseIdealLoop *phase ) const {
+  return false;
+}
+
+//------------------------------policy_range_check-----------------------------
+// Return TRUE or FALSE if the loop should be range-check-eliminated.
+// Actually we do iteration-splitting, a more powerful form of RCE.
+bool IdealLoopTree::policy_range_check( PhaseIdealLoop *phase ) const {
+  if( !RangeCheckElimination ) return false;
+
+  CountedLoopNode *cl = _head->as_CountedLoop();
+  // If we unrolled with no intention of doing RCE and we later
+  // changed our minds, we got no pre-loop.  Either we need to
+  // make a new pre-loop, or we gotta disallow RCE.
+  if( cl->is_main_no_pre_loop() ) return false; // Disallowed for now.
+  Node *trip_counter = cl->phi();
+
+  // Check loop body for tests of trip-counter plus loop-invariant vs
+  // loop-invariant.
+  for( uint i = 0; i < _body.size(); i++ ) {
+    Node *iff = _body[i];
+    if( iff->Opcode() == Op_If ) { // Test?
+
+      // Comparing trip+off vs limit
+      Node *bol = iff->in(1);
+      if( bol->req() != 2 ) continue; // dead constant test
+      Node *cmp = bol->in(1);
+
+      Node *rc_exp = cmp->in(1);
+      Node *limit = cmp->in(2);
+
+      Node *limit_c = phase->get_ctrl(limit);
+      if( limit_c == phase->C->top() )
+        return false;           // Found dead test on live IF?  No RCE!
+      if( is_member(phase->get_loop(limit_c) ) ) {
+        // Compare might have operands swapped; commute them
+        rc_exp = cmp->in(2);
+        limit  = cmp->in(1);
+        limit_c = phase->get_ctrl(limit);
+        if( is_member(phase->get_loop(limit_c) ) )
+          continue;             // Both inputs are loop varying; cannot RCE
+      }
+
+      if (!phase->is_scaled_iv_plus_offset(rc_exp, trip_counter, NULL, NULL)) {
+        continue;
+      }
+      // Yeah!  Found a test like 'trip+off vs limit'
+      // Test is an IfNode, has 2 projections.  If BOTH are in the loop
+      // we need loop unswitching instead of iteration splitting.
+      if( is_loop_exit(iff) )
+        return true;            // Found reason to split iterations
+    } // End of is IF
+  }
+
+  return false;
+}
+
+//------------------------------policy_peel_only-------------------------------
+// Return TRUE or FALSE if the loop should NEVER be RCE'd or aligned.  Useful
+// for unrolling loops with NO array accesses.
+bool IdealLoopTree::policy_peel_only( PhaseIdealLoop *phase ) const {
+
+  for( uint i = 0; i < _body.size(); i++ )
+    if( _body[i]->is_Mem() )
+      return false;
+
+  // No memory accesses at all!
+  return true;
+}
+
+//------------------------------clone_up_backedge_goo--------------------------
+// If Node n lives in the back_ctrl block and cannot float, we clone a private
+// version of n in preheader_ctrl block and return that, otherwise return n.
+Node *PhaseIdealLoop::clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n ) {
+  if( get_ctrl(n) != back_ctrl ) return n;
+
+  Node *x = NULL;               // If required, a clone of 'n'
+  // Check for 'n' being pinned in the backedge.
+  if( n->in(0) && n->in(0) == back_ctrl ) {
+    x = n->clone();             // Clone a copy of 'n' to preheader
+    x->set_req( 0, preheader_ctrl ); // Fix x's control input to preheader
+  }
+
+  // Recursive fixup any other input edges into x.
+  // If there are no changes we can just return 'n', otherwise
+  // we need to clone a private copy and change it.
+  for( uint i = 1; i < n->req(); i++ ) {
+    Node *g = clone_up_backedge_goo( back_ctrl, preheader_ctrl, n->in(i) );
+    if( g != n->in(i) ) {
+      if( !x )
+        x = n->clone();
+      x->set_req(i, g);
+    }
+  }
+  if( x ) {                     // x can legally float to pre-header location
+    register_new_node( x, preheader_ctrl );
+    return x;
+  } else {                      // raise n to cover LCA of uses
+    set_ctrl( n, find_non_split_ctrl(back_ctrl->in(0)) );
+  }
+  return n;
+}
+
+//------------------------------insert_pre_post_loops--------------------------
+// Insert pre and post loops.  If peel_only is set, the pre-loop can not have
+// more iterations added.  It acts as a 'peel' only, no lower-bound RCE, no
+// alignment.  Useful to unroll loops that do no array accesses.
+void PhaseIdealLoop::insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only ) {
+
+  C->set_major_progress();
+
+  // Find common pieces of the loop being guarded with pre & post loops
+  CountedLoopNode *main_head = loop->_head->as_CountedLoop();
+  assert( main_head->is_normal_loop(), "" );
+  CountedLoopEndNode *main_end = main_head->loopexit();
+  assert( main_end->outcnt() == 2, "1 true, 1 false path only" );
+  uint dd_main_head = dom_depth(main_head);
+  uint max = main_head->outcnt();
+
+  Node *pre_header= main_head->in(LoopNode::EntryControl);
+  Node *init      = main_head->init_trip();
+  Node *incr      = main_end ->incr();
+  Node *limit     = main_end ->limit();
+  Node *stride    = main_end ->stride();
+  Node *cmp       = main_end ->cmp_node();
+  BoolTest::mask b_test = main_end->test_trip();
+
+  // Need only 1 user of 'bol' because I will be hacking the loop bounds.
+  Node *bol = main_end->in(CountedLoopEndNode::TestValue);
+  if( bol->outcnt() != 1 ) {
+    bol = bol->clone();
+    register_new_node(bol,main_end->in(CountedLoopEndNode::TestControl));
+    _igvn.hash_delete(main_end);
+    main_end->set_req(CountedLoopEndNode::TestValue, bol);
+  }
+  // Need only 1 user of 'cmp' because I will be hacking the loop bounds.
+  if( cmp->outcnt() != 1 ) {
+    cmp = cmp->clone();
+    register_new_node(cmp,main_end->in(CountedLoopEndNode::TestControl));
+    _igvn.hash_delete(bol);
+    bol->set_req(1, cmp);
+  }
+
+  //------------------------------
+  // Step A: Create Post-Loop.
+  Node* main_exit = main_end->proj_out(false);
+  assert( main_exit->Opcode() == Op_IfFalse, "" );
+  int dd_main_exit = dom_depth(main_exit);
+
+  // Step A1: Clone the loop body.  The clone becomes the post-loop.  The main
+  // loop pre-header illegally has 2 control users (old & new loops).
+  clone_loop( loop, old_new, dd_main_exit );
+  assert( old_new[main_end ->_idx]->Opcode() == Op_CountedLoopEnd, "" );
+  CountedLoopNode *post_head = old_new[main_head->_idx]->as_CountedLoop();
+  post_head->set_post_loop(main_head);
+
+  // Build the main-loop normal exit.
+  IfFalseNode *new_main_exit = new (C, 1) IfFalseNode(main_end);
+  _igvn.register_new_node_with_optimizer( new_main_exit );
+  set_idom(new_main_exit, main_end, dd_main_exit );
+  set_loop(new_main_exit, loop->_parent);
+
+  // Step A2: Build a zero-trip guard for the post-loop.  After leaving the
+  // main-loop, the post-loop may not execute at all.  We 'opaque' the incr
+  // (the main-loop trip-counter exit value) because we will be changing
+  // the exit value (via unrolling) so we cannot constant-fold away the zero
+  // trip guard until all unrolling is done.
+  Node *zer_opaq = new (C, 2) Opaque1Node(incr);
+  Node *zer_cmp  = new (C, 3) CmpINode( zer_opaq, limit );
+  Node *zer_bol  = new (C, 2) BoolNode( zer_cmp, b_test );
+  register_new_node( zer_opaq, new_main_exit );
+  register_new_node( zer_cmp , new_main_exit );
+  register_new_node( zer_bol , new_main_exit );
+
+  // Build the IfNode
+  IfNode *zer_iff = new (C, 2) IfNode( new_main_exit, zer_bol, PROB_FAIR, COUNT_UNKNOWN );
+  _igvn.register_new_node_with_optimizer( zer_iff );
+  set_idom(zer_iff, new_main_exit, dd_main_exit);
+  set_loop(zer_iff, loop->_parent);
+
+  // Plug in the false-path, taken if we need to skip post-loop
+  _igvn.hash_delete( main_exit );
+  main_exit->set_req(0, zer_iff);
+  _igvn._worklist.push(main_exit);
+  set_idom(main_exit, zer_iff, dd_main_exit);
+  set_idom(main_exit->unique_out(), zer_iff, dd_main_exit);
+  // Make the true-path, must enter the post loop
+  Node *zer_taken = new (C, 1) IfTrueNode( zer_iff );
+  _igvn.register_new_node_with_optimizer( zer_taken );
+  set_idom(zer_taken, zer_iff, dd_main_exit);
+  set_loop(zer_taken, loop->_parent);
+  // Plug in the true path
+  _igvn.hash_delete( post_head );
+  post_head->set_req(LoopNode::EntryControl, zer_taken);
+  set_idom(post_head, zer_taken, dd_main_exit);
+
+  // Step A3: Make the fall-in values to the post-loop come from the
+  // fall-out values of the main-loop.
+  for (DUIterator_Fast imax, i = main_head->fast_outs(imax); i < imax; i++) {
+    Node* main_phi = main_head->fast_out(i);
+    if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() >0 ) {
+      Node *post_phi = old_new[main_phi->_idx];
+      Node *fallmain  = clone_up_backedge_goo(main_head->back_control(),
+                                              post_head->init_control(),
+                                              main_phi->in(LoopNode::LoopBackControl));
+      _igvn.hash_delete(post_phi);
+      post_phi->set_req( LoopNode::EntryControl, fallmain );
+    }
+  }
+
+  // Update local caches for next stanza
+  main_exit = new_main_exit;
+
+
+  //------------------------------
+  // Step B: Create Pre-Loop.
+
+  // Step B1: Clone the loop body.  The clone becomes the pre-loop.  The main
+  // loop pre-header illegally has 2 control users (old & new loops).
+  clone_loop( loop, old_new, dd_main_head );
+  CountedLoopNode*    pre_head = old_new[main_head->_idx]->as_CountedLoop();
+  CountedLoopEndNode* pre_end  = old_new[main_end ->_idx]->as_CountedLoopEnd();
+  pre_head->set_pre_loop(main_head);
+  Node *pre_incr = old_new[incr->_idx];
+
+  // Find the pre-loop normal exit.
+  Node* pre_exit = pre_end->proj_out(false);
+  assert( pre_exit->Opcode() == Op_IfFalse, "" );
+  IfFalseNode *new_pre_exit = new (C, 1) IfFalseNode(pre_end);
+  _igvn.register_new_node_with_optimizer( new_pre_exit );
+  set_idom(new_pre_exit, pre_end, dd_main_head);
+  set_loop(new_pre_exit, loop->_parent);
+
+  // Step B2: Build a zero-trip guard for the main-loop.  After leaving the
+  // pre-loop, the main-loop may not execute at all.  Later in life this
+  // zero-trip guard will become the minimum-trip guard when we unroll
+  // the main-loop.
+  Node *min_opaq = new (C, 2) Opaque1Node(limit);
+  Node *min_cmp  = new (C, 3) CmpINode( pre_incr, min_opaq );
+  Node *min_bol  = new (C, 2) BoolNode( min_cmp, b_test );
+  register_new_node( min_opaq, new_pre_exit );
+  register_new_node( min_cmp , new_pre_exit );
+  register_new_node( min_bol , new_pre_exit );
+
+  // Build the IfNode
+  IfNode *min_iff = new (C, 2) IfNode( new_pre_exit, min_bol, PROB_FAIR, COUNT_UNKNOWN );
+  _igvn.register_new_node_with_optimizer( min_iff );
+  set_idom(min_iff, new_pre_exit, dd_main_head);
+  set_loop(min_iff, loop->_parent);
+
+  // Plug in the false-path, taken if we need to skip main-loop
+  _igvn.hash_delete( pre_exit );
+  pre_exit->set_req(0, min_iff);
+  set_idom(pre_exit, min_iff, dd_main_head);
+  set_idom(pre_exit->unique_out(), min_iff, dd_main_head);
+  // Make the true-path, must enter the main loop
+  Node *min_taken = new (C, 1) IfTrueNode( min_iff );
+  _igvn.register_new_node_with_optimizer( min_taken );
+  set_idom(min_taken, min_iff, dd_main_head);
+  set_loop(min_taken, loop->_parent);
+  // Plug in the true path
+  _igvn.hash_delete( main_head );
+  main_head->set_req(LoopNode::EntryControl, min_taken);
+  set_idom(main_head, min_taken, dd_main_head);
+
+  // Step B3: Make the fall-in values to the main-loop come from the
+  // fall-out values of the pre-loop.
+  for (DUIterator_Fast i2max, i2 = main_head->fast_outs(i2max); i2 < i2max; i2++) {
+    Node* main_phi = main_head->fast_out(i2);
+    if( main_phi->is_Phi() && main_phi->in(0) == main_head && main_phi->outcnt() > 0 ) {
+      Node *pre_phi = old_new[main_phi->_idx];
+      Node *fallpre  = clone_up_backedge_goo(pre_head->back_control(),
+                                             main_head->init_control(),
+                                             pre_phi->in(LoopNode::LoopBackControl));
+      _igvn.hash_delete(main_phi);
+      main_phi->set_req( LoopNode::EntryControl, fallpre );
+    }
+  }
+
+  // Step B4: Shorten the pre-loop to run only 1 iteration (for now).
+  // RCE and alignment may change this later.
+  Node *cmp_end = pre_end->cmp_node();
+  assert( cmp_end->in(2) == limit, "" );
+  Node *pre_limit = new (C, 3) AddINode( init, stride );
+
+  // Save the original loop limit in this Opaque1 node for
+  // use by range check elimination.
+  Node *pre_opaq  = new (C, 3) Opaque1Node(pre_limit, limit);
+
+  register_new_node( pre_limit, pre_head->in(0) );
+  register_new_node( pre_opaq , pre_head->in(0) );
+
+  // Since no other users of pre-loop compare, I can hack limit directly
+  assert( cmp_end->outcnt() == 1, "no other users" );
+  _igvn.hash_delete(cmp_end);
+  cmp_end->set_req(2, peel_only ? pre_limit : pre_opaq);
+
+  // Special case for not-equal loop bounds:
+  // Change pre loop test, main loop test, and the
+  // main loop guard test to use lt or gt depending on stride
+  // direction:
+  // positive stride use <
+  // negative stride use >
+
+  if (pre_end->in(CountedLoopEndNode::TestValue)->as_Bool()->_test._test == BoolTest::ne) {
+
+    BoolTest::mask new_test = (main_end->stride_con() > 0) ? BoolTest::lt : BoolTest::gt;
+    // Modify pre loop end condition
+    Node* pre_bol = pre_end->in(CountedLoopEndNode::TestValue)->as_Bool();
+    BoolNode* new_bol0 = new (C, 2) BoolNode(pre_bol->in(1), new_test);
+    register_new_node( new_bol0, pre_head->in(0) );
+    _igvn.hash_delete(pre_end);
+    pre_end->set_req(CountedLoopEndNode::TestValue, new_bol0);
+    // Modify main loop guard condition
+    assert(min_iff->in(CountedLoopEndNode::TestValue) == min_bol, "guard okay");
+    BoolNode* new_bol1 = new (C, 2) BoolNode(min_bol->in(1), new_test);
+    register_new_node( new_bol1, new_pre_exit );
+    _igvn.hash_delete(min_iff);
+    min_iff->set_req(CountedLoopEndNode::TestValue, new_bol1);
+    // Modify main loop end condition
+    BoolNode* main_bol = main_end->in(CountedLoopEndNode::TestValue)->as_Bool();
+    BoolNode* new_bol2 = new (C, 2) BoolNode(main_bol->in(1), new_test);
+    register_new_node( new_bol2, main_end->in(CountedLoopEndNode::TestControl) );
+    _igvn.hash_delete(main_end);
+    main_end->set_req(CountedLoopEndNode::TestValue, new_bol2);
+  }
+
+  // Flag main loop
+  main_head->set_main_loop();
+  if( peel_only ) main_head->set_main_no_pre_loop();
+
+  // It's difficult to be precise about the trip-counts
+  // for the pre/post loops.  They are usually very short,
+  // so guess that 4 trips is a reasonable value.
+  post_head->set_profile_trip_cnt(4.0);
+  pre_head->set_profile_trip_cnt(4.0);
+
+  // Now force out all loop-invariant dominating tests.  The optimizer
+  // finds some, but we _know_ they are all useless.
+  peeled_dom_test_elim(loop,old_new);
+}
+
+//------------------------------is_invariant-----------------------------
+// Return true if n is invariant
+bool IdealLoopTree::is_invariant(Node* n) const {
+  Node *n_c = _phase->get_ctrl(n);
+  if (n_c->is_top()) return false;
+  return !is_member(_phase->get_loop(n_c));
+}
+
+
+//------------------------------do_unroll--------------------------------------
+// Unroll the loop body one step - make each trip do 2 iterations.
+void PhaseIdealLoop::do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip ) {
+  assert( LoopUnrollLimit, "" );
+#ifndef PRODUCT
+  if( PrintOpto && VerifyLoopOptimizations ) {
+    tty->print("Unrolling ");
+    loop->dump_head();
+  }
+#endif
+  CountedLoopNode *loop_head = loop->_head->as_CountedLoop();
+  CountedLoopEndNode *loop_end = loop_head->loopexit();
+  assert( loop_end, "" );
+
+  // Remember loop node count before unrolling to detect
+  // if rounds of unroll,optimize are making progress
+  loop_head->set_node_count_before_unroll(loop->_body.size());
+
+  Node *ctrl  = loop_head->in(LoopNode::EntryControl);
+  Node *limit = loop_head->limit();
+  Node *init  = loop_head->init_trip();
+  Node *strid = loop_head->stride();
+
+  Node *opaq = NULL;
+  if( adjust_min_trip ) {       // If not maximally unrolling, need adjustment
+    assert( loop_head->is_main_loop(), "" );
+    assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+    Node *iff = ctrl->in(0);
+    assert( iff->Opcode() == Op_If, "" );
+    Node *bol = iff->in(1);
+    assert( bol->Opcode() == Op_Bool, "" );
+    Node *cmp = bol->in(1);
+    assert( cmp->Opcode() == Op_CmpI, "" );
+    opaq = cmp->in(2);
+    // Occasionally it's possible for a pre-loop Opaque1 node to be
+    // optimized away and then another round of loop opts attempted.
+    // We can not optimize this particular loop in that case.
+    if( opaq->Opcode() != Op_Opaque1 )
+      return;                   // Cannot find pre-loop!  Bail out!
+  }
+
+  C->set_major_progress();
+
+  // Adjust max trip count. The trip count is intentionally rounded
+  // down here (e.g. 15-> 7-> 3-> 1) because if we unwittingly over-unroll,
+  // the main, unrolled, part of the loop will never execute as it is protected
+  // by the min-trip test.  See bug 4834191 for a case where we over-unrolled
+  // and later determined that part of the unrolled loop was dead.
+  loop_head->set_trip_count(loop_head->trip_count() / 2);
+
+  // Double the count of original iterations in the unrolled loop body.
+  loop_head->double_unrolled_count();
+
+  // -----------
+  // Step 2: Cut back the trip counter for an unroll amount of 2.
+  // Loop will normally trip (limit - init)/stride_con.  Since it's a
+  // CountedLoop this is exact (stride divides limit-init exactly).
+  // We are going to double the loop body, so we want to knock off any
+  // odd iteration: (trip_cnt & ~1).  Then back compute a new limit.
+  Node *span = new (C, 3) SubINode( limit, init );
+  register_new_node( span, ctrl );
+  Node *trip = new (C, 3) DivINode( 0, span, strid );
+  register_new_node( trip, ctrl );
+  Node *mtwo = _igvn.intcon(-2);
+  set_ctrl(mtwo, C->root());
+  Node *rond = new (C, 3) AndINode( trip, mtwo );
+  register_new_node( rond, ctrl );
+  Node *spn2 = new (C, 3) MulINode( rond, strid );
+  register_new_node( spn2, ctrl );
+  Node *lim2 = new (C, 3) AddINode( spn2, init );
+  register_new_node( lim2, ctrl );
+
+  // Hammer in the new limit
+  Node *ctrl2 = loop_end->in(0);
+  Node *cmp2 = new (C, 3) CmpINode( loop_head->incr(), lim2 );
+  register_new_node( cmp2, ctrl2 );
+  Node *bol2 = new (C, 2) BoolNode( cmp2, loop_end->test_trip() );
+  register_new_node( bol2, ctrl2 );
+  _igvn.hash_delete(loop_end);
+  loop_end->set_req(CountedLoopEndNode::TestValue, bol2);
+
+  // Step 3: Find the min-trip test guaranteed before a 'main' loop.
+  // Make it a 1-trip test (means at least 2 trips).
+  if( adjust_min_trip ) {
+    // Guard test uses an 'opaque' node which is not shared.  Hence I
+    // can edit it's inputs directly.  Hammer in the new limit for the
+    // minimum-trip guard.
+    assert( opaq->outcnt() == 1, "" );
+    _igvn.hash_delete(opaq);
+    opaq->set_req(1, lim2);
+  }
+
+  // ---------
+  // Step 4: Clone the loop body.  Move it inside the loop.  This loop body
+  // represents the odd iterations; since the loop trips an even number of
+  // times its backedge is never taken.  Kill the backedge.
+  uint dd = dom_depth(loop_head);
+  clone_loop( loop, old_new, dd );
+
+  // Make backedges of the clone equal to backedges of the original.
+  // Make the fall-in from the original come from the fall-out of the clone.
+  for (DUIterator_Fast jmax, j = loop_head->fast_outs(jmax); j < jmax; j++) {
+    Node* phi = loop_head->fast_out(j);
+    if( phi->is_Phi() && phi->in(0) == loop_head && phi->outcnt() > 0 ) {
+      Node *newphi = old_new[phi->_idx];
+      _igvn.hash_delete( phi );
+      _igvn.hash_delete( newphi );
+
+      phi   ->set_req(LoopNode::   EntryControl, newphi->in(LoopNode::LoopBackControl));
+      newphi->set_req(LoopNode::LoopBackControl, phi   ->in(LoopNode::LoopBackControl));
+      phi   ->set_req(LoopNode::LoopBackControl, C->top());
+    }
+  }
+  Node *clone_head = old_new[loop_head->_idx];
+  _igvn.hash_delete( clone_head );
+  loop_head ->set_req(LoopNode::   EntryControl, clone_head->in(LoopNode::LoopBackControl));
+  clone_head->set_req(LoopNode::LoopBackControl, loop_head ->in(LoopNode::LoopBackControl));
+  loop_head ->set_req(LoopNode::LoopBackControl, C->top());
+  loop->_head = clone_head;     // New loop header
+
+  set_idom(loop_head,  loop_head ->in(LoopNode::EntryControl), dd);
+  set_idom(clone_head, clone_head->in(LoopNode::EntryControl), dd);
+
+  // Kill the clone's backedge
+  Node *newcle = old_new[loop_end->_idx];
+  _igvn.hash_delete( newcle );
+  Node *one = _igvn.intcon(1);
+  set_ctrl(one, C->root());
+  newcle->set_req(1, one);
+  // Force clone into same loop body
+  uint max = loop->_body.size();
+  for( uint k = 0; k < max; k++ ) {
+    Node *old = loop->_body.at(k);
+    Node *nnn = old_new[old->_idx];
+    loop->_body.push(nnn);
+    if (!has_ctrl(old))
+      set_loop(nnn, loop);
+  }
+}
+
+//------------------------------do_maximally_unroll----------------------------
+
+void PhaseIdealLoop::do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new ) {
+  CountedLoopNode *cl = loop->_head->as_CountedLoop();
+  assert( cl->trip_count() > 0, "");
+
+  // If loop is tripping an odd number of times, peel odd iteration
+  if( (cl->trip_count() & 1) == 1 ) {
+    do_peeling( loop, old_new );
+  }
+
+  // Now its tripping an even number of times remaining.  Double loop body.
+  // Do not adjust pre-guards; they are not needed and do not exist.
+  if( cl->trip_count() > 0 ) {
+    do_unroll( loop, old_new, false );
+  }
+}
+
+//------------------------------dominates_backedge---------------------------------
+// Returns true if ctrl is executed on every complete iteration
+bool IdealLoopTree::dominates_backedge(Node* ctrl) {
+  assert(ctrl->is_CFG(), "must be control");
+  Node* backedge = _head->as_Loop()->in(LoopNode::LoopBackControl);
+  return _phase->dom_lca_internal(ctrl, backedge) == ctrl;
+}
+
+//------------------------------add_constraint---------------------------------
+// Constrain the main loop iterations so the condition:
+//    scale_con * I + offset  <  limit
+// always holds true.  That is, either increase the number of iterations in
+// the pre-loop or the post-loop until the condition holds true in the main
+// loop.  Stride, scale, offset and limit are all loop invariant.  Further,
+// stride and scale are constants (offset and limit often are).
+void PhaseIdealLoop::add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit ) {
+
+  // Compute "I :: (limit-offset)/scale_con"
+  Node *con = new (C, 3) SubINode( limit, offset );
+  register_new_node( con, pre_ctrl );
+  Node *scale = _igvn.intcon(scale_con);
+  set_ctrl(scale, C->root());
+  Node *X = new (C, 3) DivINode( 0, con, scale );
+  register_new_node( X, pre_ctrl );
+
+  // For positive stride, the pre-loop limit always uses a MAX function
+  // and the main loop a MIN function.  For negative stride these are
+  // reversed.
+
+  // Also for positive stride*scale the affine function is increasing, so the
+  // pre-loop must check for underflow and the post-loop for overflow.
+  // Negative stride*scale reverses this; pre-loop checks for overflow and
+  // post-loop for underflow.
+  if( stride_con*scale_con > 0 ) {
+    // Compute I < (limit-offset)/scale_con
+    // Adjust main-loop last iteration to be MIN/MAX(main_loop,X)
+    *main_limit = (stride_con > 0)
+      ? (Node*)(new (C, 3) MinINode( *main_limit, X ))
+      : (Node*)(new (C, 3) MaxINode( *main_limit, X ));
+    register_new_node( *main_limit, pre_ctrl );
+
+  } else {
+    // Compute (limit-offset)/scale_con + SGN(-scale_con) <= I
+    // Add the negation of the main-loop constraint to the pre-loop.
+    // See footnote [++] below for a derivation of the limit expression.
+    Node *incr = _igvn.intcon(scale_con > 0 ? -1 : 1);
+    set_ctrl(incr, C->root());
+    Node *adj = new (C, 3) AddINode( X, incr );
+    register_new_node( adj, pre_ctrl );
+    *pre_limit = (scale_con > 0)
+      ? (Node*)new (C, 3) MinINode( *pre_limit, adj )
+      : (Node*)new (C, 3) MaxINode( *pre_limit, adj );
+    register_new_node( *pre_limit, pre_ctrl );
+
+//   [++] Here's the algebra that justifies the pre-loop limit expression:
+//
+//   NOT( scale_con * I + offset  <  limit )
+//      ==
+//   scale_con * I + offset  >=  limit
+//      ==
+//   SGN(scale_con) * I  >=  (limit-offset)/|scale_con|
+//      ==
+//   (limit-offset)/|scale_con|   <=  I * SGN(scale_con)
+//      ==
+//   (limit-offset)/|scale_con|-1  <  I * SGN(scale_con)
+//      ==
+//   ( if (scale_con > 0) /*common case*/
+//       (limit-offset)/scale_con - 1  <  I
+//     else
+//       (limit-offset)/scale_con + 1  >  I
+//    )
+//   ( if (scale_con > 0) /*common case*/
+//       (limit-offset)/scale_con + SGN(-scale_con)  <  I
+//     else
+//       (limit-offset)/scale_con + SGN(-scale_con)  >  I
+  }
+}
+
+
+//------------------------------is_scaled_iv---------------------------------
+// Return true if exp is a constant times an induction var
+bool PhaseIdealLoop::is_scaled_iv(Node* exp, Node* iv, int* p_scale) {
+  if (exp == iv) {
+    if (p_scale != NULL) {
+      *p_scale = 1;
+    }
+    return true;
+  }
+  int opc = exp->Opcode();
+  if (opc == Op_MulI) {
+    if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+      if (p_scale != NULL) {
+        *p_scale = exp->in(2)->get_int();
+      }
+      return true;
+    }
+    if (exp->in(2) == iv && exp->in(1)->is_Con()) {
+      if (p_scale != NULL) {
+        *p_scale = exp->in(1)->get_int();
+      }
+      return true;
+    }
+  } else if (opc == Op_LShiftI) {
+    if (exp->in(1) == iv && exp->in(2)->is_Con()) {
+      if (p_scale != NULL) {
+        *p_scale = 1 << exp->in(2)->get_int();
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+//-----------------------------is_scaled_iv_plus_offset------------------------------
+// Return true if exp is a simple induction variable expression: k1*iv + (invar + k2)
+bool PhaseIdealLoop::is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth) {
+  if (is_scaled_iv(exp, iv, p_scale)) {
+    if (p_offset != NULL) {
+      Node *zero = _igvn.intcon(0);
+      set_ctrl(zero, C->root());
+      *p_offset = zero;
+    }
+    return true;
+  }
+  int opc = exp->Opcode();
+  if (opc == Op_AddI) {
+    if (is_scaled_iv(exp->in(1), iv, p_scale)) {
+      if (p_offset != NULL) {
+        *p_offset = exp->in(2);
+      }
+      return true;
+    }
+    if (exp->in(2)->is_Con()) {
+      Node* offset2 = NULL;
+      if (depth < 2 &&
+          is_scaled_iv_plus_offset(exp->in(1), iv, p_scale,
+                                   p_offset != NULL ? &offset2 : NULL, depth+1)) {
+        if (p_offset != NULL) {
+          Node *ctrl_off2 = get_ctrl(offset2);
+          Node* offset = new (C, 3) AddINode(offset2, exp->in(2));
+          register_new_node(offset, ctrl_off2);
+          *p_offset = offset;
+        }
+        return true;
+      }
+    }
+  } else if (opc == Op_SubI) {
+    if (is_scaled_iv(exp->in(1), iv, p_scale)) {
+      if (p_offset != NULL) {
+        Node *zero = _igvn.intcon(0);
+        set_ctrl(zero, C->root());
+        Node *ctrl_off = get_ctrl(exp->in(2));
+        Node* offset = new (C, 3) SubINode(zero, exp->in(2));
+        register_new_node(offset, ctrl_off);
+        *p_offset = offset;
+      }
+      return true;
+    }
+    if (is_scaled_iv(exp->in(2), iv, p_scale)) {
+      if (p_offset != NULL) {
+        *p_scale *= -1;
+        *p_offset = exp->in(1);
+      }
+      return true;
+    }
+  }
+  return false;
+}
+
+//------------------------------do_range_check---------------------------------
+// Eliminate range-checks and other trip-counter vs loop-invariant tests.
+void PhaseIdealLoop::do_range_check( IdealLoopTree *loop, Node_List &old_new ) {
+#ifndef PRODUCT
+  if( PrintOpto && VerifyLoopOptimizations ) {
+    tty->print("Range Check Elimination ");
+    loop->dump_head();
+  }
+#endif
+  assert( RangeCheckElimination, "" );
+  CountedLoopNode *cl = loop->_head->as_CountedLoop();
+  assert( cl->is_main_loop(), "" );
+
+  // Find the trip counter; we are iteration splitting based on it
+  Node *trip_counter = cl->phi();
+  // Find the main loop limit; we will trim it's iterations
+  // to not ever trip end tests
+  Node *main_limit = cl->limit();
+  // Find the pre-loop limit; we will expand it's iterations to
+  // not ever trip low tests.
+  Node *ctrl  = cl->in(LoopNode::EntryControl);
+  assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+  Node *iffm = ctrl->in(0);
+  assert( iffm->Opcode() == Op_If, "" );
+  Node *p_f = iffm->in(0);
+  assert( p_f->Opcode() == Op_IfFalse, "" );
+  CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
+  assert( pre_end->loopnode()->is_pre_loop(), "" );
+  Node *pre_opaq1 = pre_end->limit();
+  // Occasionally it's possible for a pre-loop Opaque1 node to be
+  // optimized away and then another round of loop opts attempted.
+  // We can not optimize this particular loop in that case.
+  if( pre_opaq1->Opcode() != Op_Opaque1 )
+    return;
+  Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
+  Node *pre_limit = pre_opaq->in(1);
+
+  // Where do we put new limit calculations
+  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+
+  // Ensure the original loop limit is available from the
+  // pre-loop Opaque1 node.
+  Node *orig_limit = pre_opaq->original_loop_limit();
+  if( orig_limit == NULL || _igvn.type(orig_limit) == Type::TOP )
+    return;
+
+  // Need to find the main-loop zero-trip guard
+  Node *bolzm = iffm->in(1);
+  assert( bolzm->Opcode() == Op_Bool, "" );
+  Node *cmpzm = bolzm->in(1);
+  assert( cmpzm->is_Cmp(), "" );
+  Node *opqzm = cmpzm->in(2);
+  if( opqzm->Opcode() != Op_Opaque1 )
+    return;
+  assert( opqzm->in(1) == main_limit, "do not understand situation" );
+
+  // Must know if its a count-up or count-down loop
+
+  // protect against stride not being a constant
+  if ( !cl->stride_is_con() ) {
+    return;
+  }
+  int stride_con = cl->stride_con();
+  Node *zero = _igvn.intcon(0);
+  Node *one  = _igvn.intcon(1);
+  set_ctrl(zero, C->root());
+  set_ctrl(one,  C->root());
+
+  // Range checks that do not dominate the loop backedge (ie.
+  // conditionally executed) can lengthen the pre loop limit beyond
+  // the original loop limit. To prevent this, the pre limit is
+  // (for stride > 0) MINed with the original loop limit (MAXed
+  // stride < 0) when some range_check (rc) is conditionally
+  // executed.
+  bool conditional_rc = false;
+
+  // Check loop body for tests of trip-counter plus loop-invariant vs
+  // loop-invariant.
+  for( uint i = 0; i < loop->_body.size(); i++ ) {
+    Node *iff = loop->_body[i];
+    if( iff->Opcode() == Op_If ) { // Test?
+
+      // Test is an IfNode, has 2 projections.  If BOTH are in the loop
+      // we need loop unswitching instead of iteration splitting.
+      Node *exit = loop->is_loop_exit(iff);
+      if( !exit ) continue;
+      int flip = (exit->Opcode() == Op_IfTrue) ? 1 : 0;
+
+      // Get boolean condition to test
+      Node *i1 = iff->in(1);
+      if( !i1->is_Bool() ) continue;
+      BoolNode *bol = i1->as_Bool();
+      BoolTest b_test = bol->_test;
+      // Flip sense of test if exit condition is flipped
+      if( flip )
+        b_test = b_test.negate();
+
+      // Get compare
+      Node *cmp = bol->in(1);
+
+      // Look for trip_counter + offset vs limit
+      Node *rc_exp = cmp->in(1);
+      Node *limit  = cmp->in(2);
+      jint scale_con= 1;        // Assume trip counter not scaled
+
+      Node *limit_c = get_ctrl(limit);
+      if( loop->is_member(get_loop(limit_c) ) ) {
+        // Compare might have operands swapped; commute them
+        b_test = b_test.commute();
+        rc_exp = cmp->in(2);
+        limit  = cmp->in(1);
+        limit_c = get_ctrl(limit);
+        if( loop->is_member(get_loop(limit_c) ) )
+          continue;             // Both inputs are loop varying; cannot RCE
+      }
+      // Here we know 'limit' is loop invariant
+
+      // 'limit' maybe pinned below the zero trip test (probably from a
+      // previous round of rce), in which case, it can't be used in the
+      // zero trip test expression which must occur before the zero test's if.
+      if( limit_c == ctrl ) {
+        continue;  // Don't rce this check but continue looking for other candidates.
+      }
+
+      // Check for scaled induction variable plus an offset
+      Node *offset = NULL;
+
+      if (!is_scaled_iv_plus_offset(rc_exp, trip_counter, &scale_con, &offset)) {
+        continue;
+      }
+
+      Node *offset_c = get_ctrl(offset);
+      if( loop->is_member( get_loop(offset_c) ) )
+        continue;               // Offset is not really loop invariant
+      // Here we know 'offset' is loop invariant.
+
+      // As above for the 'limit', the 'offset' maybe pinned below the
+      // zero trip test.
+      if( offset_c == ctrl ) {
+        continue; // Don't rce this check but continue looking for other candidates.
+      }
+
+      // At this point we have the expression as:
+      //   scale_con * trip_counter + offset :: limit
+      // where scale_con, offset and limit are loop invariant.  Trip_counter
+      // monotonically increases by stride_con, a constant.  Both (or either)
+      // stride_con and scale_con can be negative which will flip about the
+      // sense of the test.
+
+      // Adjust pre and main loop limits to guard the correct iteration set
+      if( cmp->Opcode() == Op_CmpU ) {// Unsigned compare is really 2 tests
+        if( b_test._test == BoolTest::lt ) { // Range checks always use lt
+          // The overflow limit: scale*I+offset < limit
+          add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+          // The underflow limit: 0 <= scale*I+offset.
+          // Some math yields: -scale*I-(offset+1) < 0
+          Node *plus_one = new (C, 3) AddINode( offset, one );
+          register_new_node( plus_one, pre_ctrl );
+          Node *neg_offset = new (C, 3) SubINode( zero, plus_one );
+          register_new_node( neg_offset, pre_ctrl );
+          add_constraint( stride_con, -scale_con, neg_offset, zero, pre_ctrl, &pre_limit, &main_limit );
+          if (!conditional_rc) {
+            conditional_rc = !loop->dominates_backedge(iff);
+          }
+        } else {
+#ifndef PRODUCT
+          if( PrintOpto )
+            tty->print_cr("missed RCE opportunity");
+#endif
+          continue;             // In release mode, ignore it
+        }
+      } else {                  // Otherwise work on normal compares
+        switch( b_test._test ) {
+        case BoolTest::ge:      // Convert X >= Y to -X <= -Y
+          scale_con = -scale_con;
+          offset = new (C, 3) SubINode( zero, offset );
+          register_new_node( offset, pre_ctrl );
+          limit  = new (C, 3) SubINode( zero, limit  );
+          register_new_node( limit, pre_ctrl );
+          // Fall into LE case
+        case BoolTest::le:      // Convert X <= Y to X < Y+1
+          limit = new (C, 3) AddINode( limit, one );
+          register_new_node( limit, pre_ctrl );
+          // Fall into LT case
+        case BoolTest::lt:
+          add_constraint( stride_con, scale_con, offset, limit, pre_ctrl, &pre_limit, &main_limit );
+          if (!conditional_rc) {
+            conditional_rc = !loop->dominates_backedge(iff);
+          }
+          break;
+        default:
+#ifndef PRODUCT
+          if( PrintOpto )
+            tty->print_cr("missed RCE opportunity");
+#endif
+          continue;             // Unhandled case
+        }
+      }
+
+      // Kill the eliminated test
+      C->set_major_progress();
+      Node *kill_con = _igvn.intcon( 1-flip );
+      set_ctrl(kill_con, C->root());
+      _igvn.hash_delete(iff);
+      iff->set_req(1, kill_con);
+      _igvn._worklist.push(iff);
+      // Find surviving projection
+      assert(iff->is_If(), "");
+      ProjNode* dp = ((IfNode*)iff)->proj_out(1-flip);
+      // Find loads off the surviving projection; remove their control edge
+      for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
+        Node* cd = dp->fast_out(i); // Control-dependent node
+        if( cd->is_Load() ) {   // Loads can now float around in the loop
+          _igvn.hash_delete(cd);
+          // Allow the load to float around in the loop, or before it
+          // but NOT before the pre-loop.
+          cd->set_req(0, ctrl);   // ctrl, not NULL
+          _igvn._worklist.push(cd);
+          --i;
+          --imax;
+        }
+      }
+
+    } // End of is IF
+
+  }
+
+  // Update loop limits
+  if (conditional_rc) {
+    pre_limit = (stride_con > 0) ? (Node*)new (C,3) MinINode(pre_limit, orig_limit)
+                                 : (Node*)new (C,3) MaxINode(pre_limit, orig_limit);
+    register_new_node(pre_limit, pre_ctrl);
+  }
+  _igvn.hash_delete(pre_opaq);
+  pre_opaq->set_req(1, pre_limit);
+
+  // Note:: we are making the main loop limit no longer precise;
+  // need to round up based on stride.
+  if( stride_con != 1 && stride_con != -1 ) { // Cutout for common case
+    // "Standard" round-up logic:  ([main_limit-init+(y-1)]/y)*y+init
+    // Hopefully, compiler will optimize for powers of 2.
+    Node *ctrl = get_ctrl(main_limit);
+    Node *stride = cl->stride();
+    Node *init = cl->init_trip();
+    Node *span = new (C, 3) SubINode(main_limit,init);
+    register_new_node(span,ctrl);
+    Node *rndup = _igvn.intcon(stride_con + ((stride_con>0)?-1:1));
+    Node *add = new (C, 3) AddINode(span,rndup);
+    register_new_node(add,ctrl);
+    Node *div = new (C, 3) DivINode(0,add,stride);
+    register_new_node(div,ctrl);
+    Node *mul = new (C, 3) MulINode(div,stride);
+    register_new_node(mul,ctrl);
+    Node *newlim = new (C, 3) AddINode(mul,init);
+    register_new_node(newlim,ctrl);
+    main_limit = newlim;
+  }
+
+  Node *main_cle = cl->loopexit();
+  Node *main_bol = main_cle->in(1);
+  // Hacking loop bounds; need private copies of exit test
+  if( main_bol->outcnt() > 1 ) {// BoolNode shared?
+    _igvn.hash_delete(main_cle);
+    main_bol = main_bol->clone();// Clone a private BoolNode
+    register_new_node( main_bol, main_cle->in(0) );
+    main_cle->set_req(1,main_bol);
+  }
+  Node *main_cmp = main_bol->in(1);
+  if( main_cmp->outcnt() > 1 ) { // CmpNode shared?
+    _igvn.hash_delete(main_bol);
+    main_cmp = main_cmp->clone();// Clone a private CmpNode
+    register_new_node( main_cmp, main_cle->in(0) );
+    main_bol->set_req(1,main_cmp);
+  }
+  // Hack the now-private loop bounds
+  _igvn.hash_delete(main_cmp);
+  main_cmp->set_req(2, main_limit);
+  _igvn._worklist.push(main_cmp);
+  // The OpaqueNode is unshared by design
+  _igvn.hash_delete(opqzm);
+  assert( opqzm->outcnt() == 1, "cannot hack shared node" );
+  opqzm->set_req(1,main_limit);
+  _igvn._worklist.push(opqzm);
+}
+
+//------------------------------DCE_loop_body----------------------------------
+// Remove simplistic dead code from loop body
+void IdealLoopTree::DCE_loop_body() {
+  for( uint i = 0; i < _body.size(); i++ )
+    if( _body.at(i)->outcnt() == 0 )
+      _body.map( i--, _body.pop() );
+}
+
+
+//------------------------------adjust_loop_exit_prob--------------------------
+// Look for loop-exit tests with the 50/50 (or worse) guesses from the parsing stage.
+// Replace with a 1-in-10 exit guess.
+void IdealLoopTree::adjust_loop_exit_prob( PhaseIdealLoop *phase ) {
+  Node *test = tail();
+  while( test != _head ) {
+    uint top = test->Opcode();
+    if( top == Op_IfTrue || top == Op_IfFalse ) {
+      int test_con = ((ProjNode*)test)->_con;
+      assert(top == (uint)(test_con? Op_IfTrue: Op_IfFalse), "sanity");
+      IfNode *iff = test->in(0)->as_If();
+      if( iff->outcnt() == 2 ) {        // Ignore dead tests
+        Node *bol = iff->in(1);
+        if( bol && bol->req() > 1 && bol->in(1) &&
+            ((bol->in(1)->Opcode() == Op_StorePConditional ) ||
+             (bol->in(1)->Opcode() == Op_StoreLConditional ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndSwapI ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndSwapL ) ||
+             (bol->in(1)->Opcode() == Op_CompareAndSwapP )))
+          return;               // Allocation loops RARELY take backedge
+        // Find the OTHER exit path from the IF
+        Node* ex = iff->proj_out(1-test_con);
+        float p = iff->_prob;
+        if( !phase->is_member( this, ex ) && iff->_fcnt == COUNT_UNKNOWN ) {
+          if( top == Op_IfTrue ) {
+            if( p < (PROB_FAIR + PROB_UNLIKELY_MAG(3))) {
+              iff->_prob = PROB_STATIC_FREQUENT;
+            }
+          } else {
+            if( p > (PROB_FAIR - PROB_UNLIKELY_MAG(3))) {
+              iff->_prob = PROB_STATIC_INFREQUENT;
+            }
+          }
+        }
+      }
+    }
+    test = phase->idom(test);
+  }
+}
+
+
+//------------------------------policy_do_remove_empty_loop--------------------
+// Micro-benchmark spamming.  Policy is to always remove empty loops.
+// The 'DO' part is to replace the trip counter with the value it will
+// have on the last iteration.  This will break the loop.
+bool IdealLoopTree::policy_do_remove_empty_loop( PhaseIdealLoop *phase ) {
+  // Minimum size must be empty loop
+  if( _body.size() > 7/*number of nodes in an empty loop*/ ) return false;
+
+  if( !_head->is_CountedLoop() ) return false;     // Dead loop
+  CountedLoopNode *cl = _head->as_CountedLoop();
+  if( !cl->loopexit() ) return false; // Malformed loop
+  if( !phase->is_member(this,phase->get_ctrl(cl->loopexit()->in(CountedLoopEndNode::TestValue)) ) )
+    return false;             // Infinite loop
+#ifndef PRODUCT
+  if( PrintOpto )
+    tty->print_cr("Removing empty loop");
+#endif
+#ifdef ASSERT
+  // Ensure only one phi which is the iv.
+  Node* iv = NULL;
+  for (DUIterator_Fast imax, i = cl->fast_outs(imax); i < imax; i++) {
+    Node* n = cl->fast_out(i);
+    if (n->Opcode() == Op_Phi) {
+      assert(iv == NULL, "Too many phis" );
+      iv = n;
+    }
+  }
+  assert(iv == cl->phi(), "Wrong phi" );
+#endif
+  // Replace the phi at loop head with the final value of the last
+  // iteration.  Then the CountedLoopEnd will collapse (backedge never
+  // taken) and all loop-invariant uses of the exit values will be correct.
+  Node *phi = cl->phi();
+  Node *final = new (phase->C, 3) SubINode( cl->limit(), cl->stride() );
+  phase->register_new_node(final,cl->in(LoopNode::EntryControl));
+  phase->_igvn.hash_delete(phi);
+  phase->_igvn.subsume_node(phi,final);
+  phase->C->set_major_progress();
+  return true;
+}
+
+
+//=============================================================================
+//------------------------------iteration_split_impl---------------------------
+void IdealLoopTree::iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new ) {
+  // Check and remove empty loops (spam micro-benchmarks)
+  if( policy_do_remove_empty_loop(phase) )
+    return;                     // Here we removed an empty loop
+
+  bool should_peel = policy_peeling(phase); // Should we peel?
+
+  bool should_unswitch = policy_unswitching(phase);
+
+  // Non-counted loops may be peeled; exactly 1 iteration is peeled.
+  // This removes loop-invariant tests (usually null checks).
+  if( !_head->is_CountedLoop() ) { // Non-counted loop
+    if (PartialPeelLoop && phase->partial_peel(this, old_new)) {
+      return;
+    }
+    if( should_peel ) {            // Should we peel?
+#ifndef PRODUCT
+      if (PrintOpto) tty->print_cr("should_peel");
+#endif
+      phase->do_peeling(this,old_new);
+    } else if( should_unswitch ) {
+      phase->do_unswitching(this, old_new);
+    }
+    return;
+  }
+  CountedLoopNode *cl = _head->as_CountedLoop();
+
+  if( !cl->loopexit() ) return; // Ignore various kinds of broken loops
+
+  // Do nothing special to pre- and post- loops
+  if( cl->is_pre_loop() || cl->is_post_loop() ) return;
+
+  // Compute loop trip count from profile data
+  compute_profile_trip_cnt(phase);
+
+  // Before attempting fancy unrolling, RCE or alignment, see if we want
+  // to completely unroll this loop or do loop unswitching.
+  if( cl->is_normal_loop() ) {
+    bool should_maximally_unroll =  policy_maximally_unroll(phase);
+    if( should_maximally_unroll ) {
+      // Here we did some unrolling and peeling.  Eventually we will
+      // completely unroll this loop and it will no longer be a loop.
+      phase->do_maximally_unroll(this,old_new);
+      return;
+    }
+    if (should_unswitch) {
+      phase->do_unswitching(this, old_new);
+      return;
+    }
+  }
+
+
+  // Counted loops may be peeled, may need some iterations run up
+  // front for RCE, and may want to align loop refs to a cache
+  // line.  Thus we clone a full loop up front whose trip count is
+  // at least 1 (if peeling), but may be several more.
+
+  // The main loop will start cache-line aligned with at least 1
+  // iteration of the unrolled body (zero-trip test required) and
+  // will have some range checks removed.
+
+  // A post-loop will finish any odd iterations (leftover after
+  // unrolling), plus any needed for RCE purposes.
+
+  bool should_unroll = policy_unroll(phase);
+
+  bool should_rce = policy_range_check(phase);
+
+  bool should_align = policy_align(phase);
+
+  // If not RCE'ing (iteration splitting) or Aligning, then we do not
+  // need a pre-loop.  We may still need to peel an initial iteration but
+  // we will not be needing an unknown number of pre-iterations.
+  //
+  // Basically, if may_rce_align reports FALSE first time through,
+  // we will not be able to later do RCE or Aligning on this loop.
+  bool may_rce_align = !policy_peel_only(phase) || should_rce || should_align;
+
+  // If we have any of these conditions (RCE, alignment, unrolling) met, then
+  // we switch to the pre-/main-/post-loop model.  This model also covers
+  // peeling.
+  if( should_rce || should_align || should_unroll ) {
+    if( cl->is_normal_loop() )  // Convert to 'pre/main/post' loops
+      phase->insert_pre_post_loops(this,old_new, !may_rce_align);
+
+    // Adjust the pre- and main-loop limits to let the pre and post loops run
+    // with full checks, but the main-loop with no checks.  Remove said
+    // checks from the main body.
+    if( should_rce )
+      phase->do_range_check(this,old_new);
+
+    // Double loop body for unrolling.  Adjust the minimum-trip test (will do
+    // twice as many iterations as before) and the main body limit (only do
+    // an even number of trips).  If we are peeling, we might enable some RCE
+    // and we'd rather unroll the post-RCE'd loop SO... do not unroll if
+    // peeling.
+    if( should_unroll && !should_peel )
+      phase->do_unroll(this,old_new, true);
+
+    // Adjust the pre-loop limits to align the main body
+    // iterations.
+    if( should_align )
+      Unimplemented();
+
+  } else {                      // Else we have an unchanged counted loop
+    if( should_peel )           // Might want to peel but do nothing else
+      phase->do_peeling(this,old_new);
+  }
+}
+
+
+//=============================================================================
+//------------------------------iteration_split--------------------------------
+void IdealLoopTree::iteration_split( PhaseIdealLoop *phase, Node_List &old_new ) {
+  // Recursively iteration split nested loops
+  if( _child ) _child->iteration_split( phase, old_new );
+
+  // Clean out prior deadwood
+  DCE_loop_body();
+
+
+  // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
+  // Replace with a 1-in-10 exit guess.
+  if( _parent /*not the root loop*/ &&
+      !_irreducible &&
+      // Also ignore the occasional dead backedge
+      !tail()->is_top() ) {
+    adjust_loop_exit_prob(phase);
+  }
+
+
+  // Gate unrolling, RCE and peeling efforts.
+  if( !_child &&                // If not an inner loop, do not split
+      !_irreducible &&
+      !tail()->is_top() ) {     // Also ignore the occasional dead backedge
+    if (!_has_call) {
+      iteration_split_impl( phase, old_new );
+    } else if (policy_unswitching(phase)) {
+      phase->do_unswitching(this, old_new);
+    }
+  }
+
+  // Minor offset re-organization to remove loop-fallout uses of
+  // trip counter.
+  if( _head->is_CountedLoop() ) phase->reorg_offsets( this );
+  if( _next ) _next->iteration_split( phase, old_new );
+}
diff --git a/src/share/vm/opto/loopUnswitch.cpp b/src/share/vm/opto/loopUnswitch.cpp
new file mode 100644
index 000000000..fcba517e8
--- /dev/null
+++ b/src/share/vm/opto/loopUnswitch.cpp
@@ -0,0 +1,237 @@
+/*
+ * Copyright 2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopUnswitch.cpp.incl"
+
+//================= Loop Unswitching =====================
+//
+// orig:                       transformed:
+//                               if (invariant-test) then
+//  loop                           loop
+//    stmt1                          stmt1
+//    if (invariant-test) then       stmt2
+//      stmt2                        stmt4
+//    else                         endloop
+//      stmt3                    else
+//    endif                        loop [clone]
+//    stmt4                          stmt1 [clone]
+//  endloop                          stmt3
+//                                   stmt4 [clone]
+//                                 endloop
+//                               endif
+//
+// Note: the "else" clause may be empty
+
+//------------------------------policy_unswitching-----------------------------
+// Return TRUE or FALSE if the loop should be unswitched
+// (ie. clone loop with an invariant test that does not exit the loop)
+bool IdealLoopTree::policy_unswitching( PhaseIdealLoop *phase ) const {
+  if( !LoopUnswitching ) {
+    return false;
+  }
+  uint nodes_left = MaxNodeLimit - phase->C->unique();
+  if (2 * _body.size() > nodes_left) {
+    return false; // Too speculative if running low on nodes.
+  }
+  LoopNode* head = _head->as_Loop();
+  if (head->unswitch_count() + 1 > head->unswitch_max()) {
+    return false;
+  }
+  return phase->find_unswitching_candidate(this) != NULL;
+}
+
+//------------------------------find_unswitching_candidate-----------------------------
+// Find candidate "if" for unswitching
+IfNode* PhaseIdealLoop::find_unswitching_candidate(const IdealLoopTree *loop) const {
+
+  // Find first invariant test that doesn't exit the loop
+  LoopNode *head = loop->_head->as_Loop();
+  IfNode* unswitch_iff = NULL;
+  Node* n = head->in(LoopNode::LoopBackControl);
+  while (n != head) {
+    Node* n_dom = idom(n);
+    if (n->is_Region()) {
+      if (n_dom->is_If()) {
+        IfNode* iff = n_dom->as_If();
+        if (iff->in(1)->is_Bool()) {
+          BoolNode* bol = iff->in(1)->as_Bool();
+          if (bol->in(1)->is_Cmp()) {
+            // If condition is invariant and not a loop exit,
+            // then found reason to unswitch.
+            if (loop->is_invariant(bol) && !loop->is_loop_exit(iff)) {
+              unswitch_iff = iff;
+            }
+          }
+        }
+      }
+    }
+    n = n_dom;
+  }
+  return unswitch_iff;
+}
+
+//------------------------------do_unswitching-----------------------------
+// Clone loop with an invariant test (that does not exit) and
+// insert a clone of the test that selects which version to
+// execute.
+void PhaseIdealLoop::do_unswitching (IdealLoopTree *loop, Node_List &old_new) {
+
+  // Find first invariant test that doesn't exit the loop
+  LoopNode *head = loop->_head->as_Loop();
+
+  IfNode* unswitch_iff = find_unswitching_candidate((const IdealLoopTree *)loop);
+  assert(unswitch_iff != NULL, "should be at least one");
+
+  // Need to revert back to normal loop
+  if (head->is_CountedLoop() && !head->as_CountedLoop()->is_normal_loop()) {
+    head->as_CountedLoop()->set_normal_loop();
+  }
+
+  ProjNode* proj_true = create_slow_version_of_loop(loop, old_new);
+
+  assert(proj_true->is_IfTrue() && proj_true->unique_ctrl_out() == head, "by construction");
+
+  // Increment unswitch count
+  LoopNode* head_clone = old_new[head->_idx]->as_Loop();
+  int nct = head->unswitch_count() + 1;
+  head->set_unswitch_count(nct);
+  head_clone->set_unswitch_count(nct);
+
+  // Add test to new "if" outside of loop
+  IfNode* invar_iff   = proj_true->in(0)->as_If();
+  Node* invar_iff_c   = invar_iff->in(0);
+  BoolNode* bol       = unswitch_iff->in(1)->as_Bool();
+  invar_iff->set_req(1, bol);
+  invar_iff->_prob    = unswitch_iff->_prob;
+
+  ProjNode* proj_false = invar_iff->proj_out(0)->as_Proj();
+
+  // Hoist invariant casts out of each loop to the appropiate
+  // control projection.
+
+  Node_List worklist;
+
+  for (DUIterator_Fast imax, i = unswitch_iff->fast_outs(imax); i < imax; i++) {
+    ProjNode* proj= unswitch_iff->fast_out(i)->as_Proj();
+    // Copy to a worklist for easier manipulation
+    for (DUIterator_Fast jmax, j = proj->fast_outs(jmax); j < jmax; j++) {
+      Node* use = proj->fast_out(j);
+      if (use->Opcode() == Op_CheckCastPP && loop->is_invariant(use->in(1))) {
+        worklist.push(use);
+      }
+    }
+    ProjNode* invar_proj = invar_iff->proj_out(proj->_con)->as_Proj();
+    while (worklist.size() > 0) {
+      Node* use = worklist.pop();
+      Node* nuse = use->clone();
+      nuse->set_req(0, invar_proj);
+      _igvn.hash_delete(use);
+      use->set_req(1, nuse);
+      _igvn._worklist.push(use);
+      register_new_node(nuse, invar_proj);
+      // Same for the clone
+      Node* use_clone = old_new[use->_idx];
+      _igvn.hash_delete(use_clone);
+      use_clone->set_req(1, nuse);
+      _igvn._worklist.push(use_clone);
+    }
+  }
+
+  // Hardwire the control paths in the loops into if(true) and if(false)
+  _igvn.hash_delete(unswitch_iff);
+  short_circuit_if(unswitch_iff, proj_true);
+  _igvn._worklist.push(unswitch_iff);
+
+  IfNode* unswitch_iff_clone = old_new[unswitch_iff->_idx]->as_If();
+  _igvn.hash_delete(unswitch_iff_clone);
+  short_circuit_if(unswitch_iff_clone, proj_false);
+  _igvn._worklist.push(unswitch_iff_clone);
+
+  // Reoptimize loops
+  loop->record_for_igvn();
+  for(int i = loop->_body.size() - 1; i >= 0 ; i--) {
+    Node *n = loop->_body[i];
+    Node *n_clone = old_new[n->_idx];
+    _igvn._worklist.push(n_clone);
+  }
+
+#ifndef PRODUCT
+  if (TraceLoopUnswitching) {
+    tty->print_cr("Loop unswitching orig: %d @ %d  new: %d @ %d",
+                  head->_idx,                unswitch_iff->_idx,
+                  old_new[head->_idx]->_idx, unswitch_iff_clone->_idx);
+  }
+#endif
+
+  C->set_major_progress();
+}
+
+//-------------------------create_slow_version_of_loop------------------------
+// Create a slow version of the loop by cloning the loop
+// and inserting an if to select fast-slow versions.
+// Return control projection of the entry to the fast version.
+ProjNode* PhaseIdealLoop::create_slow_version_of_loop(IdealLoopTree *loop,
+                                                      Node_List &old_new) {
+  LoopNode* head  = loop->_head->as_Loop();
+  Node*     entry = head->in(LoopNode::EntryControl);
+  _igvn.hash_delete(entry);
+  _igvn._worklist.push(entry);
+  IdealLoopTree* outer_loop = loop->_parent;
+
+  Node *cont      = _igvn.intcon(1);
+  set_ctrl(cont, C->root());
+  Node* opq       = new (C, 2) Opaque1Node(cont);
+  register_node(opq, outer_loop, entry, dom_depth(entry));
+  Node *bol       = new (C, 2) Conv2BNode(opq);
+  register_node(bol, outer_loop, entry, dom_depth(entry));
+  IfNode* iff = new (C, 2) IfNode(entry, bol, PROB_MAX, COUNT_UNKNOWN);
+  register_node(iff, outer_loop, entry, dom_depth(entry));
+  ProjNode* iffast = new (C, 1) IfTrueNode(iff);
+  register_node(iffast, outer_loop, iff, dom_depth(iff));
+  ProjNode* ifslow = new (C, 1) IfFalseNode(iff);
+  register_node(ifslow, outer_loop, iff, dom_depth(iff));
+
+  // Clone the loop body.  The clone becomes the fast loop.  The
+  // original pre-header will (illegally) have 2 control users (old & new loops).
+  clone_loop(loop, old_new, dom_depth(head), iff);
+  assert(old_new[head->_idx]->is_Loop(), "" );
+
+  // Fast (true) control
+  _igvn.hash_delete(head);
+  head->set_req(LoopNode::EntryControl, iffast);
+  set_idom(head, iffast, dom_depth(head));
+  _igvn._worklist.push(head);
+
+  // Slow (false) control
+  LoopNode* slow_head = old_new[head->_idx]->as_Loop();
+  _igvn.hash_delete(slow_head);
+  slow_head->set_req(LoopNode::EntryControl, ifslow);
+  set_idom(slow_head, ifslow, dom_depth(slow_head));
+  _igvn._worklist.push(slow_head);
+
+  recompute_dom_depth();
+
+  return iffast;
+}
diff --git a/src/share/vm/opto/loopnode.cpp b/src/share/vm/opto/loopnode.cpp
new file mode 100644
index 000000000..938862f0f
--- /dev/null
+++ b/src/share/vm/opto/loopnode.cpp
@@ -0,0 +1,2886 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopnode.cpp.incl"
+
+//=============================================================================
+//------------------------------is_loop_iv-------------------------------------
+// Determine if a node is Counted loop induction variable.
+// The method is declared in node.hpp.
+const Node* Node::is_loop_iv() const {
+  if (this->is_Phi() && !this->as_Phi()->is_copy() &&
+      this->as_Phi()->region()->is_CountedLoop() &&
+      this->as_Phi()->region()->as_CountedLoop()->phi() == this) {
+    return this;
+  } else {
+    return NULL;
+  }
+}
+
+//=============================================================================
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void LoopNode::dump_spec(outputStream *st) const {
+  if( is_inner_loop () ) st->print( "inner " );
+  if( is_partial_peel_loop () ) st->print( "partial_peel " );
+  if( partial_peel_has_failed () ) st->print( "partial_peel_failed " );
+}
+#endif
+
+//------------------------------get_early_ctrl---------------------------------
+// Compute earliest legal control
+Node *PhaseIdealLoop::get_early_ctrl( Node *n ) {
+  assert( !n->is_Phi() && !n->is_CFG(), "this code only handles data nodes" );
+  uint i;
+  Node *early;
+  if( n->in(0) ) {
+    early = n->in(0);
+    if( !early->is_CFG() ) // Might be a non-CFG multi-def
+      early = get_ctrl(early);        // So treat input as a straight data input
+    i = 1;
+  } else {
+    early = get_ctrl(n->in(1));
+    i = 2;
+  }
+  uint e_d = dom_depth(early);
+  assert( early, "" );
+  for( ; i < n->req(); i++ ) {
+    Node *cin = get_ctrl(n->in(i));
+    assert( cin, "" );
+    // Keep deepest dominator depth
+    uint c_d = dom_depth(cin);
+    if( c_d > e_d ) {           // Deeper guy?
+      early = cin;              // Keep deepest found so far
+      e_d = c_d;
+    } else if( c_d == e_d &&    // Same depth?
+               early != cin ) { // If not equal, must use slower algorithm
+      // If same depth but not equal, one _must_ dominate the other
+      // and we want the deeper (i.e., dominated) guy.
+      Node *n1 = early;
+      Node *n2 = cin;
+      while( 1 ) {
+        n1 = idom(n1);          // Walk up until break cycle
+        n2 = idom(n2);
+        if( n1 == cin ||        // Walked early up to cin
+            dom_depth(n2) < c_d )
+          break;                // early is deeper; keep him
+        if( n2 == early ||      // Walked cin up to early
+            dom_depth(n1) < c_d ) {
+          early = cin;          // cin is deeper; keep him
+          break;
+        }
+      }
+      e_d = dom_depth(early);   // Reset depth register cache
+    }
+  }
+
+  // Return earliest legal location
+  assert(early == find_non_split_ctrl(early), "unexpected early control");
+
+  return early;
+}
+
+//------------------------------set_early_ctrl---------------------------------
+// Set earliest legal control
+void PhaseIdealLoop::set_early_ctrl( Node *n ) {
+  Node *early = get_early_ctrl(n);
+
+  // Record earliest legal location
+  set_ctrl(n, early);
+}
+
+//------------------------------set_subtree_ctrl-------------------------------
+// set missing _ctrl entries on new nodes
+void PhaseIdealLoop::set_subtree_ctrl( Node *n ) {
+  // Already set?  Get out.
+  if( _nodes[n->_idx] ) return;
+  // Recursively set _nodes array to indicate where the Node goes
+  uint i;
+  for( i = 0; i < n->req(); ++i ) {
+    Node *m = n->in(i);
+    if( m && m != C->root() )
+      set_subtree_ctrl( m );
+  }
+
+  // Fixup self
+  set_early_ctrl( n );
+}
+
+//------------------------------is_counted_loop--------------------------------
+Node *PhaseIdealLoop::is_counted_loop( Node *x, IdealLoopTree *loop ) {
+  PhaseGVN *gvn = &_igvn;
+
+  // Counted loop head must be a good RegionNode with only 3 not NULL
+  // control input edges: Self, Entry, LoopBack.
+  if ( x->in(LoopNode::Self) == NULL || x->req() != 3 )
+    return NULL;
+
+  Node *init_control = x->in(LoopNode::EntryControl);
+  Node *back_control = x->in(LoopNode::LoopBackControl);
+  if( init_control == NULL || back_control == NULL )    // Partially dead
+    return NULL;
+  // Must also check for TOP when looking for a dead loop
+  if( init_control->is_top() || back_control->is_top() )
+    return NULL;
+
+  // Allow funny placement of Safepoint
+  if( back_control->Opcode() == Op_SafePoint )
+    back_control = back_control->in(TypeFunc::Control);
+
+  // Controlling test for loop
+  Node *iftrue = back_control;
+  uint iftrue_op = iftrue->Opcode();
+  if( iftrue_op != Op_IfTrue &&
+      iftrue_op != Op_IfFalse )
+    // I have a weird back-control.  Probably the loop-exit test is in
+    // the middle of the loop and I am looking at some trailing control-flow
+    // merge point.  To fix this I would have to partially peel the loop.
+    return NULL; // Obscure back-control
+
+  // Get boolean guarding loop-back test
+  Node *iff = iftrue->in(0);
+  if( get_loop(iff) != loop || !iff->in(1)->is_Bool() ) return NULL;
+  BoolNode *test = iff->in(1)->as_Bool();
+  BoolTest::mask bt = test->_test._test;
+  float cl_prob = iff->as_If()->_prob;
+  if( iftrue_op == Op_IfFalse ) {
+    bt = BoolTest(bt).negate();
+    cl_prob = 1.0 - cl_prob;
+  }
+  // Get backedge compare
+  Node *cmp = test->in(1);
+  int cmp_op = cmp->Opcode();
+  if( cmp_op != Op_CmpI )
+    return NULL;                // Avoid pointer & float compares
+
+  // Find the trip-counter increment & limit.  Limit must be loop invariant.
+  Node *incr  = cmp->in(1);
+  Node *limit = cmp->in(2);
+
+  // ---------
+  // need 'loop()' test to tell if limit is loop invariant
+  // ---------
+
+  if( !is_member( loop, get_ctrl(incr) ) ) { // Swapped trip counter and limit?
+    Node *tmp = incr;           // Then reverse order into the CmpI
+    incr = limit;
+    limit = tmp;
+    bt = BoolTest(bt).commute(); // And commute the exit test
+  }
+  if( is_member( loop, get_ctrl(limit) ) ) // Limit must loop-invariant
+    return NULL;
+
+  // Trip-counter increment must be commutative & associative.
+  uint incr_op = incr->Opcode();
+  if( incr_op == Op_Phi && incr->req() == 3 ) {
+    incr = incr->in(2);         // Assume incr is on backedge of Phi
+    incr_op = incr->Opcode();
+  }
+  Node* trunc1 = NULL;
+  Node* trunc2 = NULL;
+  const TypeInt* iv_trunc_t = NULL;
+  if (!(incr = CountedLoopNode::match_incr_with_optional_truncation(incr, &trunc1, &trunc2, &iv_trunc_t))) {
+    return NULL; // Funny increment opcode
+  }
+
+  // Get merge point
+  Node *xphi = incr->in(1);
+  Node *stride = incr->in(2);
+  if( !stride->is_Con() ) {     // Oops, swap these
+    if( !xphi->is_Con() )       // Is the other guy a constant?
+      return NULL;              // Nope, unknown stride, bail out
+    Node *tmp = xphi;           // 'incr' is commutative, so ok to swap
+    xphi = stride;
+    stride = tmp;
+  }
+  //if( loop(xphi) != l) return NULL;// Merge point is in inner loop??
+  if( !xphi->is_Phi() ) return NULL; // Too much math on the trip counter
+  PhiNode *phi = xphi->as_Phi();
+
+  // Stride must be constant
+  const Type *stride_t = stride->bottom_type();
+  int stride_con = stride_t->is_int()->get_con();
+  assert( stride_con, "missed some peephole opt" );
+
+  // Phi must be of loop header; backedge must wrap to increment
+  if( phi->region() != x ) return NULL;
+  if( trunc1 == NULL && phi->in(LoopNode::LoopBackControl) != incr ||
+      trunc1 != NULL && phi->in(LoopNode::LoopBackControl) != trunc1 ) {
+    return NULL;
+  }
+  Node *init_trip = phi->in(LoopNode::EntryControl);
+  //if (!init_trip->is_Con()) return NULL; // avoid rolling over MAXINT/MININT
+
+  // If iv trunc type is smaller than int, check for possible wrap.
+  if (!TypeInt::INT->higher_equal(iv_trunc_t)) {
+    assert(trunc1 != NULL, "must have found some truncation");
+
+    // Get a better type for the phi (filtered thru if's)
+    const TypeInt* phi_ft = filtered_type(phi);
+
+    // Can iv take on a value that will wrap?
+    //
+    // Ensure iv's limit is not within "stride" of the wrap value.
+    //
+    // Example for "short" type
+    //    Truncation ensures value is in the range -32768..32767 (iv_trunc_t)
+    //    If the stride is +10, then the last value of the induction
+    //    variable before the increment (phi_ft->_hi) must be
+    //    <= 32767 - 10 and (phi_ft->_lo) must be >= -32768 to
+    //    ensure no truncation occurs after the increment.
+
+    if (stride_con > 0) {
+      if (iv_trunc_t->_hi - phi_ft->_hi < stride_con ||
+          iv_trunc_t->_lo > phi_ft->_lo) {
+        return NULL;  // truncation may occur
+      }
+    } else if (stride_con < 0) {
+      if (iv_trunc_t->_lo - phi_ft->_lo > stride_con ||
+          iv_trunc_t->_hi < phi_ft->_hi) {
+        return NULL;  // truncation may occur
+      }
+    }
+    // No possibility of wrap so truncation can be discarded
+    // Promote iv type to Int
+  } else {
+    assert(trunc1 == NULL && trunc2 == NULL, "no truncation for int");
+  }
+
+  // =================================================
+  // ---- SUCCESS!   Found A Trip-Counted Loop!  -----
+  //
+  // Canonicalize the condition on the test.  If we can exactly determine
+  // the trip-counter exit value, then set limit to that value and use
+  // a '!=' test.  Otherwise use conditon '<' for count-up loops and
+  // '>' for count-down loops.  If the condition is inverted and we will
+  // be rolling through MININT to MAXINT, then bail out.
+
+  C->print_method("Before CountedLoop", 3);
+
+  // Check for SafePoint on backedge and remove
+  Node *sfpt = x->in(LoopNode::LoopBackControl);
+  if( sfpt->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt)) {
+    lazy_replace( sfpt, iftrue );
+    loop->_tail = iftrue;
+  }
+
+
+  // If compare points to incr, we are ok.  Otherwise the compare
+  // can directly point to the phi; in this case adjust the compare so that
+  // it points to the incr by adusting the limit.
+  if( cmp->in(1) == phi || cmp->in(2) == phi )
+    limit = gvn->transform(new (C, 3) AddINode(limit,stride));
+
+  // trip-count for +-tive stride should be: (limit - init_trip + stride - 1)/stride.
+  // Final value for iterator should be: trip_count * stride + init_trip.
+  const Type *limit_t = limit->bottom_type();
+  const Type *init_t = init_trip->bottom_type();
+  Node *one_p = gvn->intcon( 1);
+  Node *one_m = gvn->intcon(-1);
+
+  Node *trip_count = NULL;
+  Node *hook = new (C, 6) Node(6);
+  switch( bt ) {
+  case BoolTest::eq:
+    return NULL;                // Bail out, but this loop trips at most twice!
+  case BoolTest::ne:            // Ahh, the case we desire
+    if( stride_con == 1 )
+      trip_count = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+    else if( stride_con == -1 )
+      trip_count = gvn->transform(new (C, 3) SubINode(init_trip,limit));
+    else
+      return NULL;              // Odd stride; must prove we hit limit exactly
+    set_subtree_ctrl( trip_count );
+    //_loop.map(trip_count->_idx,loop(limit));
+    break;
+  case BoolTest::le:            // Maybe convert to '<' case
+    limit = gvn->transform(new (C, 3) AddINode(limit,one_p));
+    set_subtree_ctrl( limit );
+    hook->init_req(4, limit);
+
+    bt = BoolTest::lt;
+    // Make the new limit be in the same loop nest as the old limit
+    //_loop.map(limit->_idx,limit_loop);
+    // Fall into next case
+  case BoolTest::lt: {          // Maybe convert to '!=' case
+    if( stride_con < 0 ) return NULL; // Count down loop rolls through MAXINT
+    Node *range = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+    set_subtree_ctrl( range );
+    hook->init_req(0, range);
+
+    Node *bias  = gvn->transform(new (C, 3) AddINode(range,stride));
+    set_subtree_ctrl( bias );
+    hook->init_req(1, bias);
+
+    Node *bias1 = gvn->transform(new (C, 3) AddINode(bias,one_m));
+    set_subtree_ctrl( bias1 );
+    hook->init_req(2, bias1);
+
+    trip_count  = gvn->transform(new (C, 3) DivINode(0,bias1,stride));
+    set_subtree_ctrl( trip_count );
+    hook->init_req(3, trip_count);
+    break;
+  }
+
+  case BoolTest::ge:            // Maybe convert to '>' case
+    limit = gvn->transform(new (C, 3) AddINode(limit,one_m));
+    set_subtree_ctrl( limit );
+    hook->init_req(4 ,limit);
+
+    bt = BoolTest::gt;
+    // Make the new limit be in the same loop nest as the old limit
+    //_loop.map(limit->_idx,limit_loop);
+    // Fall into next case
+  case BoolTest::gt: {          // Maybe convert to '!=' case
+    if( stride_con > 0 ) return NULL; // count up loop rolls through MININT
+    Node *range = gvn->transform(new (C, 3) SubINode(limit,init_trip));
+    set_subtree_ctrl( range );
+    hook->init_req(0, range);
+
+    Node *bias  = gvn->transform(new (C, 3) AddINode(range,stride));
+    set_subtree_ctrl( bias );
+    hook->init_req(1, bias);
+
+    Node *bias1 = gvn->transform(new (C, 3) AddINode(bias,one_p));
+    set_subtree_ctrl( bias1 );
+    hook->init_req(2, bias1);
+
+    trip_count  = gvn->transform(new (C, 3) DivINode(0,bias1,stride));
+    set_subtree_ctrl( trip_count );
+    hook->init_req(3, trip_count);
+    break;
+  }
+  }
+
+  Node *span = gvn->transform(new (C, 3) MulINode(trip_count,stride));
+  set_subtree_ctrl( span );
+  hook->init_req(5, span);
+
+  limit = gvn->transform(new (C, 3) AddINode(span,init_trip));
+  set_subtree_ctrl( limit );
+
+  // Build a canonical trip test.
+  // Clone code, as old values may be in use.
+  incr = incr->clone();
+  incr->set_req(1,phi);
+  incr->set_req(2,stride);
+  incr = _igvn.register_new_node_with_optimizer(incr);
+  set_early_ctrl( incr );
+  _igvn.hash_delete(phi);
+  phi->set_req_X( LoopNode::LoopBackControl, incr, &_igvn );
+
+  // If phi type is more restrictive than Int, raise to
+  // Int to prevent (almost) infinite recursion in igvn
+  // which can only handle integer types for constants or minint..maxint.
+  if (!TypeInt::INT->higher_equal(phi->bottom_type())) {
+    Node* nphi = PhiNode::make(phi->in(0), phi->in(LoopNode::EntryControl), TypeInt::INT);
+    nphi->set_req(LoopNode::LoopBackControl, phi->in(LoopNode::LoopBackControl));
+    nphi = _igvn.register_new_node_with_optimizer(nphi);
+    set_ctrl(nphi, get_ctrl(phi));
+    _igvn.subsume_node(phi, nphi);
+    phi = nphi->as_Phi();
+  }
+  cmp = cmp->clone();
+  cmp->set_req(1,incr);
+  cmp->set_req(2,limit);
+  cmp = _igvn.register_new_node_with_optimizer(cmp);
+  set_ctrl(cmp, iff->in(0));
+
+  Node *tmp = test->clone();
+  assert( tmp->is_Bool(), "" );
+  test = (BoolNode*)tmp;
+  (*(BoolTest*)&test->_test)._test = bt; //BoolTest::ne;
+  test->set_req(1,cmp);
+  _igvn.register_new_node_with_optimizer(test);
+  set_ctrl(test, iff->in(0));
+  // If the exit test is dead, STOP!
+  if( test == NULL ) return NULL;
+  _igvn.hash_delete(iff);
+  iff->set_req_X( 1, test, &_igvn );
+
+  // Replace the old IfNode with a new LoopEndNode
+  Node *lex = _igvn.register_new_node_with_optimizer(new (C, 2) CountedLoopEndNode( iff->in(0), iff->in(1), cl_prob, iff->as_If()->_fcnt ));
+  IfNode *le = lex->as_If();
+  uint dd = dom_depth(iff);
+  set_idom(le, le->in(0), dd); // Update dominance for loop exit
+  set_loop(le, loop);
+
+  // Get the loop-exit control
+  Node *if_f = iff->as_If()->proj_out(!(iftrue_op == Op_IfTrue));
+
+  // Need to swap loop-exit and loop-back control?
+  if( iftrue_op == Op_IfFalse ) {
+    Node *ift2=_igvn.register_new_node_with_optimizer(new (C, 1) IfTrueNode (le));
+    Node *iff2=_igvn.register_new_node_with_optimizer(new (C, 1) IfFalseNode(le));
+
+    loop->_tail = back_control = ift2;
+    set_loop(ift2, loop);
+    set_loop(iff2, get_loop(if_f));
+
+    // Lazy update of 'get_ctrl' mechanism.
+    lazy_replace_proj( if_f  , iff2 );
+    lazy_replace_proj( iftrue, ift2 );
+
+    // Swap names
+    if_f   = iff2;
+    iftrue = ift2;
+  } else {
+    _igvn.hash_delete(if_f  );
+    _igvn.hash_delete(iftrue);
+    if_f  ->set_req_X( 0, le, &_igvn );
+    iftrue->set_req_X( 0, le, &_igvn );
+  }
+
+  set_idom(iftrue, le, dd+1);
+  set_idom(if_f,   le, dd+1);
+
+  // Now setup a new CountedLoopNode to replace the existing LoopNode
+  CountedLoopNode *l = new (C, 3) CountedLoopNode(init_control, back_control);
+  // The following assert is approximately true, and defines the intention
+  // of can_be_counted_loop.  It fails, however, because phase->type
+  // is not yet initialized for this loop and its parts.
+  //assert(l->can_be_counted_loop(this), "sanity");
+  _igvn.register_new_node_with_optimizer(l);
+  set_loop(l, loop);
+  loop->_head = l;
+  // Fix all data nodes placed at the old loop head.
+  // Uses the lazy-update mechanism of 'get_ctrl'.
+  lazy_replace( x, l );
+  set_idom(l, init_control, dom_depth(x));
+
+  // Check for immediately preceeding SafePoint and remove
+  Node *sfpt2 = le->in(0);
+  if( sfpt2->Opcode() == Op_SafePoint && is_deleteable_safept(sfpt2))
+    lazy_replace( sfpt2, sfpt2->in(TypeFunc::Control));
+
+  // Free up intermediate goo
+  _igvn.remove_dead_node(hook);
+
+  C->print_method("After CountedLoop", 3);
+
+  // Return trip counter
+  return trip_count;
+}
+
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Attempt to convert into a counted-loop.
+Node *LoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (!can_be_counted_loop(phase)) {
+    phase->C->set_major_progress();
+  }
+  return RegionNode::Ideal(phase, can_reshape);
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.
+// Attempt to convert into a counted-loop.
+Node *CountedLoopNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return RegionNode::Ideal(phase, can_reshape);
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void CountedLoopNode::dump_spec(outputStream *st) const {
+  LoopNode::dump_spec(st);
+  if( stride_is_con() ) {
+    st->print("stride: %d ",stride_con());
+  } else {
+    st->print("stride: not constant ");
+  }
+  if( is_pre_loop () ) st->print("pre of N%d" , _main_idx );
+  if( is_main_loop() ) st->print("main of N%d", _idx );
+  if( is_post_loop() ) st->print("post of N%d", _main_idx );
+}
+#endif
+
+//=============================================================================
+int CountedLoopEndNode::stride_con() const {
+  return stride()->bottom_type()->is_int()->get_con();
+}
+
+
+//----------------------match_incr_with_optional_truncation--------------------
+// Match increment with optional truncation:
+// CHAR: (i+1)&0x7fff, BYTE: ((i+1)<<8)>>8, or SHORT: ((i+1)<<16)>>16
+// Return NULL for failure. Success returns the increment node.
+Node* CountedLoopNode::match_incr_with_optional_truncation(
+                      Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type) {
+  // Quick cutouts:
+  if (expr == NULL || expr->req() != 3)  return false;
+
+  Node *t1 = NULL;
+  Node *t2 = NULL;
+  const TypeInt* trunc_t = TypeInt::INT;
+  Node* n1 = expr;
+  int   n1op = n1->Opcode();
+
+  // Try to strip (n1 & M) or (n1 << N >> N) from n1.
+  if (n1op == Op_AndI &&
+      n1->in(2)->is_Con() &&
+      n1->in(2)->bottom_type()->is_int()->get_con() == 0x7fff) {
+    // %%% This check should match any mask of 2**K-1.
+    t1 = n1;
+    n1 = t1->in(1);
+    n1op = n1->Opcode();
+    trunc_t = TypeInt::CHAR;
+  } else if (n1op == Op_RShiftI &&
+             n1->in(1) != NULL &&
+             n1->in(1)->Opcode() == Op_LShiftI &&
+             n1->in(2) == n1->in(1)->in(2) &&
+             n1->in(2)->is_Con()) {
+    jint shift = n1->in(2)->bottom_type()->is_int()->get_con();
+    // %%% This check should match any shift in [1..31].
+    if (shift == 16 || shift == 8) {
+      t1 = n1;
+      t2 = t1->in(1);
+      n1 = t2->in(1);
+      n1op = n1->Opcode();
+      if (shift == 16) {
+        trunc_t = TypeInt::SHORT;
+      } else if (shift == 8) {
+        trunc_t = TypeInt::BYTE;
+      }
+    }
+  }
+
+  // If (maybe after stripping) it is an AddI, we won:
+  if (n1op == Op_AddI) {
+    *trunc1 = t1;
+    *trunc2 = t2;
+    *trunc_type = trunc_t;
+    return n1;
+  }
+
+  // failed
+  return NULL;
+}
+
+
+//------------------------------filtered_type--------------------------------
+// Return a type based on condition control flow
+// A successful return will be a type that is restricted due
+// to a series of dominating if-tests, such as:
+//    if (i < 10) {
+//       if (i > 0) {
+//          here: "i" type is [1..10)
+//       }
+//    }
+// or a control flow merge
+//    if (i < 10) {
+//       do {
+//          phi( , ) -- at top of loop type is [min_int..10)
+//         i = ?
+//       } while ( i < 10)
+//
+const TypeInt* PhaseIdealLoop::filtered_type( Node *n, Node* n_ctrl) {
+  assert(n && n->bottom_type()->is_int(), "must be int");
+  const TypeInt* filtered_t = NULL;
+  if (!n->is_Phi()) {
+    assert(n_ctrl != NULL || n_ctrl == C->top(), "valid control");
+    filtered_t = filtered_type_from_dominators(n, n_ctrl);
+
+  } else {
+    Node* phi    = n->as_Phi();
+    Node* region = phi->in(0);
+    assert(n_ctrl == NULL || n_ctrl == region, "ctrl parameter must be region");
+    if (region && region != C->top()) {
+      for (uint i = 1; i < phi->req(); i++) {
+        Node* val   = phi->in(i);
+        Node* use_c = region->in(i);
+        const TypeInt* val_t = filtered_type_from_dominators(val, use_c);
+        if (val_t != NULL) {
+          if (filtered_t == NULL) {
+            filtered_t = val_t;
+          } else {
+            filtered_t = filtered_t->meet(val_t)->is_int();
+          }
+        }
+      }
+    }
+  }
+  const TypeInt* n_t = _igvn.type(n)->is_int();
+  if (filtered_t != NULL) {
+    n_t = n_t->join(filtered_t)->is_int();
+  }
+  return n_t;
+}
+
+
+//------------------------------filtered_type_from_dominators--------------------------------
+// Return a possibly more restrictive type for val based on condition control flow of dominators
+const TypeInt* PhaseIdealLoop::filtered_type_from_dominators( Node* val, Node *use_ctrl) {
+  if (val->is_Con()) {
+     return val->bottom_type()->is_int();
+  }
+  uint if_limit = 10; // Max number of dominating if's visited
+  const TypeInt* rtn_t = NULL;
+
+  if (use_ctrl && use_ctrl != C->top()) {
+    Node* val_ctrl = get_ctrl(val);
+    uint val_dom_depth = dom_depth(val_ctrl);
+    Node* pred = use_ctrl;
+    uint if_cnt = 0;
+    while (if_cnt < if_limit) {
+      if ((pred->Opcode() == Op_IfTrue || pred->Opcode() == Op_IfFalse)) {
+        if_cnt++;
+        const TypeInt* if_t = filtered_type_at_if(val, pred);
+        if (if_t != NULL) {
+          if (rtn_t == NULL) {
+            rtn_t = if_t;
+          } else {
+            rtn_t = rtn_t->join(if_t)->is_int();
+          }
+        }
+      }
+      pred = idom(pred);
+      if (pred == NULL || pred == C->top()) {
+        break;
+      }
+      // Stop if going beyond definition block of val
+      if (dom_depth(pred) < val_dom_depth) {
+        break;
+      }
+    }
+  }
+  return rtn_t;
+}
+
+
+//------------------------------filtered_type_at_if--------------------------------
+// Return a possibly more restrictive type for val based on condition control flow for an if
+const TypeInt* PhaseIdealLoop::filtered_type_at_if( Node* val, Node *if_proj) {
+  assert(if_proj &&
+         (if_proj->Opcode() == Op_IfTrue || if_proj->Opcode() == Op_IfFalse), "expecting an if projection");
+  if (if_proj->in(0) && if_proj->in(0)->is_If()) {
+    IfNode* iff = if_proj->in(0)->as_If();
+    if (iff->in(1) && iff->in(1)->is_Bool()) {
+      BoolNode* bol = iff->in(1)->as_Bool();
+      if (bol->in(1) && bol->in(1)->is_Cmp()) {
+        const CmpNode* cmp  = bol->in(1)->as_Cmp();
+        if (cmp->in(1) == val) {
+          const TypeInt* cmp2_t = _igvn.type(cmp->in(2))->isa_int();
+          if (cmp2_t != NULL) {
+            jint lo = cmp2_t->_lo;
+            jint hi = cmp2_t->_hi;
+            BoolTest::mask msk = if_proj->Opcode() == Op_IfTrue ? bol->_test._test : bol->_test.negate();
+            switch (msk) {
+            case BoolTest::ne:
+              // Can't refine type
+              return NULL;
+            case BoolTest::eq:
+              return cmp2_t;
+            case BoolTest::lt:
+              lo = TypeInt::INT->_lo;
+              if (hi - 1 < hi) {
+                hi = hi - 1;
+              }
+              break;
+            case BoolTest::le:
+              lo = TypeInt::INT->_lo;
+              break;
+            case BoolTest::gt:
+              if (lo + 1 > lo) {
+                lo = lo + 1;
+              }
+              hi = TypeInt::INT->_hi;
+              break;
+            case BoolTest::ge:
+              // lo unchanged
+              hi = TypeInt::INT->_hi;
+              break;
+            }
+            const TypeInt* rtn_t = TypeInt::make(lo, hi, cmp2_t->_widen);
+            return rtn_t;
+          }
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void CountedLoopEndNode::dump_spec(outputStream *st) const {
+  if( in(TestValue)->is_Bool() ) {
+    BoolTest bt( test_trip()); // Added this for g++.
+
+    st->print("[");
+    bt.dump_on(st);
+    st->print("]");
+  }
+  st->print(" ");
+  IfNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+//------------------------------is_member--------------------------------------
+// Is 'l' a member of 'this'?
+int IdealLoopTree::is_member( const IdealLoopTree *l ) const {
+  while( l->_nest > _nest ) l = l->_parent;
+  return l == this;
+}
+
+//------------------------------set_nest---------------------------------------
+// Set loop tree nesting depth.  Accumulate _has_call bits.
+int IdealLoopTree::set_nest( uint depth ) {
+  _nest = depth;
+  int bits = _has_call;
+  if( _child ) bits |= _child->set_nest(depth+1);
+  if( bits ) _has_call = 1;
+  if( _next  ) bits |= _next ->set_nest(depth  );
+  return bits;
+}
+
+//------------------------------split_fall_in----------------------------------
+// Split out multiple fall-in edges from the loop header.  Move them to a
+// private RegionNode before the loop.  This becomes the loop landing pad.
+void IdealLoopTree::split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt ) {
+  PhaseIterGVN &igvn = phase->_igvn;
+  uint i;
+
+  // Make a new RegionNode to be the landing pad.
+  Node *landing_pad = new (phase->C, fall_in_cnt+1) RegionNode( fall_in_cnt+1 );
+  phase->set_loop(landing_pad,_parent);
+  // Gather all the fall-in control paths into the landing pad
+  uint icnt = fall_in_cnt;
+  uint oreq = _head->req();
+  for( i = oreq-1; i>0; i-- )
+    if( !phase->is_member( this, _head->in(i) ) )
+      landing_pad->set_req(icnt--,_head->in(i));
+
+  // Peel off PhiNode edges as well
+  for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+    Node *oj = _head->fast_out(j);
+    if( oj->is_Phi() ) {
+      PhiNode* old_phi = oj->as_Phi();
+      assert( old_phi->region() == _head, "" );
+      igvn.hash_delete(old_phi);   // Yank from hash before hacking edges
+      Node *p = PhiNode::make_blank(landing_pad, old_phi);
+      uint icnt = fall_in_cnt;
+      for( i = oreq-1; i>0; i-- ) {
+        if( !phase->is_member( this, _head->in(i) ) ) {
+          p->init_req(icnt--, old_phi->in(i));
+          // Go ahead and clean out old edges from old phi
+          old_phi->del_req(i);
+        }
+      }
+      // Search for CSE's here, because ZKM.jar does a lot of
+      // loop hackery and we need to be a little incremental
+      // with the CSE to avoid O(N^2) node blow-up.
+      Node *p2 = igvn.hash_find_insert(p); // Look for a CSE
+      if( p2 ) {                // Found CSE
+        p->destruct();          // Recover useless new node
+        p = p2;                 // Use old node
+      } else {
+        igvn.register_new_node_with_optimizer(p, old_phi);
+      }
+      // Make old Phi refer to new Phi.
+      old_phi->add_req(p);
+      // Check for the special case of making the old phi useless and
+      // disappear it.  In JavaGrande I have a case where this useless
+      // Phi is the loop limit and prevents recognizing a CountedLoop
+      // which in turn prevents removing an empty loop.
+      Node *id_old_phi = old_phi->Identity( &igvn );
+      if( id_old_phi != old_phi ) { // Found a simple identity?
+        // Note that I cannot call 'subsume_node' here, because
+        // that will yank the edge from old_phi to the Region and
+        // I'm mid-iteration over the Region's uses.
+        for (DUIterator_Last imin, i = old_phi->last_outs(imin); i >= imin; ) {
+          Node* use = old_phi->last_out(i);
+          igvn.hash_delete(use);
+          igvn._worklist.push(use);
+          uint uses_found = 0;
+          for (uint j = 0; j < use->len(); j++) {
+            if (use->in(j) == old_phi) {
+              if (j < use->req()) use->set_req (j, id_old_phi);
+              else                use->set_prec(j, id_old_phi);
+              uses_found++;
+            }
+          }
+          i -= uses_found;    // we deleted 1 or more copies of this edge
+        }
+      }
+      igvn._worklist.push(old_phi);
+    }
+  }
+  // Finally clean out the fall-in edges from the RegionNode
+  for( i = oreq-1; i>0; i-- ) {
+    if( !phase->is_member( this, _head->in(i) ) ) {
+      _head->del_req(i);
+    }
+  }
+  // Transform landing pad
+  igvn.register_new_node_with_optimizer(landing_pad, _head);
+  // Insert landing pad into the header
+  _head->add_req(landing_pad);
+}
+
+//------------------------------split_outer_loop-------------------------------
+// Split out the outermost loop from this shared header.
+void IdealLoopTree::split_outer_loop( PhaseIdealLoop *phase ) {
+  PhaseIterGVN &igvn = phase->_igvn;
+
+  // Find index of outermost loop; it should also be my tail.
+  uint outer_idx = 1;
+  while( _head->in(outer_idx) != _tail ) outer_idx++;
+
+  // Make a LoopNode for the outermost loop.
+  Node *ctl = _head->in(LoopNode::EntryControl);
+  Node *outer = new (phase->C, 3) LoopNode( ctl, _head->in(outer_idx) );
+  outer = igvn.register_new_node_with_optimizer(outer, _head);
+  phase->set_created_loop_node();
+  // Outermost loop falls into '_head' loop
+  _head->set_req(LoopNode::EntryControl, outer);
+  _head->del_req(outer_idx);
+  // Split all the Phis up between '_head' loop and 'outer' loop.
+  for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+    Node *out = _head->fast_out(j);
+    if( out->is_Phi() ) {
+      PhiNode *old_phi = out->as_Phi();
+      assert( old_phi->region() == _head, "" );
+      Node *phi = PhiNode::make_blank(outer, old_phi);
+      phi->init_req(LoopNode::EntryControl,    old_phi->in(LoopNode::EntryControl));
+      phi->init_req(LoopNode::LoopBackControl, old_phi->in(outer_idx));
+      phi = igvn.register_new_node_with_optimizer(phi, old_phi);
+      // Make old Phi point to new Phi on the fall-in path
+      igvn.hash_delete(old_phi);
+      old_phi->set_req(LoopNode::EntryControl, phi);
+      old_phi->del_req(outer_idx);
+      igvn._worklist.push(old_phi);
+    }
+  }
+
+  // Use the new loop head instead of the old shared one
+  _head = outer;
+  phase->set_loop(_head, this);
+}
+
+//------------------------------fix_parent-------------------------------------
+static void fix_parent( IdealLoopTree *loop, IdealLoopTree *parent ) {
+  loop->_parent = parent;
+  if( loop->_child ) fix_parent( loop->_child, loop   );
+  if( loop->_next  ) fix_parent( loop->_next , parent );
+}
+
+//------------------------------estimate_path_freq-----------------------------
+static float estimate_path_freq( Node *n ) {
+  // Try to extract some path frequency info
+  IfNode *iff;
+  for( int i = 0; i < 50; i++ ) { // Skip through a bunch of uncommon tests
+    uint nop = n->Opcode();
+    if( nop == Op_SafePoint ) {   // Skip any safepoint
+      n = n->in(0);
+      continue;
+    }
+    if( nop == Op_CatchProj ) {   // Get count from a prior call
+      // Assume call does not always throw exceptions: means the call-site
+      // count is also the frequency of the fall-through path.
+      assert( n->is_CatchProj(), "" );
+      if( ((CatchProjNode*)n)->_con != CatchProjNode::fall_through_index )
+        return 0.0f;            // Assume call exception path is rare
+      Node *call = n->in(0)->in(0)->in(0);
+      assert( call->is_Call(), "expect a call here" );
+      const JVMState *jvms = ((CallNode*)call)->jvms();
+      ciMethodData* methodData = jvms->method()->method_data();
+      if (!methodData->is_mature())  return 0.0f; // No call-site data
+      ciProfileData* data = methodData->bci_to_data(jvms->bci());
+      if ((data == NULL) || !data->is_CounterData()) {
+        // no call profile available, try call's control input
+        n = n->in(0);
+        continue;
+      }
+      return data->as_CounterData()->count()/FreqCountInvocations;
+    }
+    // See if there's a gating IF test
+    Node *n_c = n->in(0);
+    if( !n_c->is_If() ) break;       // No estimate available
+    iff = n_c->as_If();
+    if( iff->_fcnt != COUNT_UNKNOWN )   // Have a valid count?
+      // Compute how much count comes on this path
+      return ((nop == Op_IfTrue) ? iff->_prob : 1.0f - iff->_prob) * iff->_fcnt;
+    // Have no count info.  Skip dull uncommon-trap like branches.
+    if( (nop == Op_IfTrue  && iff->_prob < PROB_LIKELY_MAG(5)) ||
+        (nop == Op_IfFalse && iff->_prob > PROB_UNLIKELY_MAG(5)) )
+      break;
+    // Skip through never-taken branch; look for a real loop exit.
+    n = iff->in(0);
+  }
+  return 0.0f;                  // No estimate available
+}
+
+//------------------------------merge_many_backedges---------------------------
+// Merge all the backedges from the shared header into a private Region.
+// Feed that region as the one backedge to this loop.
+void IdealLoopTree::merge_many_backedges( PhaseIdealLoop *phase ) {
+  uint i;
+
+  // Scan for the top 2 hottest backedges
+  float hotcnt = 0.0f;
+  float warmcnt = 0.0f;
+  uint hot_idx = 0;
+  // Loop starts at 2 because slot 1 is the fall-in path
+  for( i = 2; i < _head->req(); i++ ) {
+    float cnt = estimate_path_freq(_head->in(i));
+    if( cnt > hotcnt ) {       // Grab hottest path
+      warmcnt = hotcnt;
+      hotcnt = cnt;
+      hot_idx = i;
+    } else if( cnt > warmcnt ) { // And 2nd hottest path
+      warmcnt = cnt;
+    }
+  }
+
+  // See if the hottest backedge is worthy of being an inner loop
+  // by being much hotter than the next hottest backedge.
+  if( hotcnt <= 0.0001 ||
+      hotcnt < 2.0*warmcnt ) hot_idx = 0;// No hot backedge
+
+  // Peel out the backedges into a private merge point; peel
+  // them all except optionally hot_idx.
+  PhaseIterGVN &igvn = phase->_igvn;
+
+  Node *hot_tail = NULL;
+  // Make a Region for the merge point
+  Node *r = new (phase->C, 1) RegionNode(1);
+  for( i = 2; i < _head->req(); i++ ) {
+    if( i != hot_idx )
+      r->add_req( _head->in(i) );
+    else hot_tail = _head->in(i);
+  }
+  igvn.register_new_node_with_optimizer(r, _head);
+  // Plug region into end of loop _head, followed by hot_tail
+  while( _head->req() > 3 ) _head->del_req( _head->req()-1 );
+  _head->set_req(2, r);
+  if( hot_idx ) _head->add_req(hot_tail);
+
+  // Split all the Phis up between '_head' loop and the Region 'r'
+  for (DUIterator_Fast jmax, j = _head->fast_outs(jmax); j < jmax; j++) {
+    Node *out = _head->fast_out(j);
+    if( out->is_Phi() ) {
+      PhiNode* n = out->as_Phi();
+      igvn.hash_delete(n);      // Delete from hash before hacking edges
+      Node *hot_phi = NULL;
+      Node *phi = new (phase->C, r->req()) PhiNode(r, n->type(), n->adr_type());
+      // Check all inputs for the ones to peel out
+      uint j = 1;
+      for( uint i = 2; i < n->req(); i++ ) {
+        if( i != hot_idx )
+          phi->set_req( j++, n->in(i) );
+        else hot_phi = n->in(i);
+      }
+      // Register the phi but do not transform until whole place transforms
+      igvn.register_new_node_with_optimizer(phi, n);
+      // Add the merge phi to the old Phi
+      while( n->req() > 3 ) n->del_req( n->req()-1 );
+      n->set_req(2, phi);
+      if( hot_idx ) n->add_req(hot_phi);
+    }
+  }
+
+
+  // Insert a new IdealLoopTree inserted below me.  Turn it into a clone
+  // of self loop tree.  Turn self into a loop headed by _head and with
+  // tail being the new merge point.
+  IdealLoopTree *ilt = new IdealLoopTree( phase, _head, _tail );
+  phase->set_loop(_tail,ilt);   // Adjust tail
+  _tail = r;                    // Self's tail is new merge point
+  phase->set_loop(r,this);
+  ilt->_child = _child;         // New guy has my children
+  _child = ilt;                 // Self has new guy as only child
+  ilt->_parent = this;          // new guy has self for parent
+  ilt->_nest = _nest;           // Same nesting depth (for now)
+
+  // Starting with 'ilt', look for child loop trees using the same shared
+  // header.  Flatten these out; they will no longer be loops in the end.
+  IdealLoopTree **pilt = &_child;
+  while( ilt ) {
+    if( ilt->_head == _head ) {
+      uint i;
+      for( i = 2; i < _head->req(); i++ )
+        if( _head->in(i) == ilt->_tail )
+          break;                // Still a loop
+      if( i == _head->req() ) { // No longer a loop
+        // Flatten ilt.  Hang ilt's "_next" list from the end of
+        // ilt's '_child' list.  Move the ilt's _child up to replace ilt.
+        IdealLoopTree **cp = &ilt->_child;
+        while( *cp ) cp = &(*cp)->_next;   // Find end of child list
+        *cp = ilt->_next;       // Hang next list at end of child list
+        *pilt = ilt->_child;    // Move child up to replace ilt
+        ilt->_head = NULL;      // Flag as a loop UNIONED into parent
+        ilt = ilt->_child;      // Repeat using new ilt
+        continue;               // do not advance over ilt->_child
+      }
+      assert( ilt->_tail == hot_tail, "expected to only find the hot inner loop here" );
+      phase->set_loop(_head,ilt);
+    }
+    pilt = &ilt->_child;        // Advance to next
+    ilt = *pilt;
+  }
+
+  if( _child ) fix_parent( _child, this );
+}
+
+//------------------------------beautify_loops---------------------------------
+// Split shared headers and insert loop landing pads.
+// Insert a LoopNode to replace the RegionNode.
+// Return TRUE if loop tree is structurally changed.
+bool IdealLoopTree::beautify_loops( PhaseIdealLoop *phase ) {
+  bool result = false;
+  // Cache parts in locals for easy
+  PhaseIterGVN &igvn = phase->_igvn;
+
+  phase->C->print_method("Before beautify loops", 3);
+
+  igvn.hash_delete(_head);      // Yank from hash before hacking edges
+
+  // Check for multiple fall-in paths.  Peel off a landing pad if need be.
+  int fall_in_cnt = 0;
+  for( uint i = 1; i < _head->req(); i++ )
+    if( !phase->is_member( this, _head->in(i) ) )
+      fall_in_cnt++;
+  assert( fall_in_cnt, "at least 1 fall-in path" );
+  if( fall_in_cnt > 1 )         // Need a loop landing pad to merge fall-ins
+    split_fall_in( phase, fall_in_cnt );
+
+  // Swap inputs to the _head and all Phis to move the fall-in edge to
+  // the left.
+  fall_in_cnt = 1;
+  while( phase->is_member( this, _head->in(fall_in_cnt) ) )
+    fall_in_cnt++;
+  if( fall_in_cnt > 1 ) {
+    // Since I am just swapping inputs I do not need to update def-use info
+    Node *tmp = _head->in(1);
+    _head->set_req( 1, _head->in(fall_in_cnt) );
+    _head->set_req( fall_in_cnt, tmp );
+    // Swap also all Phis
+    for (DUIterator_Fast imax, i = _head->fast_outs(imax); i < imax; i++) {
+      Node* phi = _head->fast_out(i);
+      if( phi->is_Phi() ) {
+        igvn.hash_delete(phi); // Yank from hash before hacking edges
+        tmp = phi->in(1);
+        phi->set_req( 1, phi->in(fall_in_cnt) );
+        phi->set_req( fall_in_cnt, tmp );
+      }
+    }
+  }
+  assert( !phase->is_member( this, _head->in(1) ), "left edge is fall-in" );
+  assert(  phase->is_member( this, _head->in(2) ), "right edge is loop" );
+
+  // If I am a shared header (multiple backedges), peel off the many
+  // backedges into a private merge point and use the merge point as
+  // the one true backedge.
+  if( _head->req() > 3 ) {
+    // Merge the many backedges into a single backedge.
+    merge_many_backedges( phase );
+    result = true;
+  }
+
+  // If I am a shared header (multiple backedges), peel off myself loop.
+  // I better be the outermost loop.
+  if( _head->req() > 3 ) {
+    split_outer_loop( phase );
+    result = true;
+
+  } else if( !_head->is_Loop() && !_irreducible ) {
+    // Make a new LoopNode to replace the old loop head
+    Node *l = new (phase->C, 3) LoopNode( _head->in(1), _head->in(2) );
+    l = igvn.register_new_node_with_optimizer(l, _head);
+    phase->set_created_loop_node();
+    // Go ahead and replace _head
+    phase->_igvn.subsume_node( _head, l );
+    _head = l;
+    phase->set_loop(_head, this);
+    for (DUIterator_Fast imax, i = l->fast_outs(imax); i < imax; i++)
+      phase->_igvn.add_users_to_worklist(l->fast_out(i));
+  }
+
+  phase->C->print_method("After beautify loops", 3);
+
+  // Now recursively beautify nested loops
+  if( _child ) result |= _child->beautify_loops( phase );
+  if( _next  ) result |= _next ->beautify_loops( phase );
+  return result;
+}
+
+//------------------------------allpaths_check_safepts----------------------------
+// Allpaths backwards scan from loop tail, terminating each path at first safepoint
+// encountered.  Helper for check_safepts.
+void IdealLoopTree::allpaths_check_safepts(VectorSet &visited, Node_List &stack) {
+  assert(stack.size() == 0, "empty stack");
+  stack.push(_tail);
+  visited.Clear();
+  visited.set(_tail->_idx);
+  while (stack.size() > 0) {
+    Node* n = stack.pop();
+    if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
+      // Terminate this path
+    } else if (n->Opcode() == Op_SafePoint) {
+      if (_phase->get_loop(n) != this) {
+        if (_required_safept == NULL) _required_safept = new Node_List();
+        _required_safept->push(n);  // save the one closest to the tail
+      }
+      // Terminate this path
+    } else {
+      uint start = n->is_Region() ? 1 : 0;
+      uint end   = n->is_Region() && !n->is_Loop() ? n->req() : start + 1;
+      for (uint i = start; i < end; i++) {
+        Node* in = n->in(i);
+        assert(in->is_CFG(), "must be");
+        if (!visited.test_set(in->_idx) && is_member(_phase->get_loop(in))) {
+          stack.push(in);
+        }
+      }
+    }
+  }
+}
+
+//------------------------------check_safepts----------------------------
+// Given dominators, try to find loops with calls that must always be
+// executed (call dominates loop tail).  These loops do not need non-call
+// safepoints (ncsfpt).
+//
+// A complication is that a safepoint in a inner loop may be needed
+// by an outer loop. In the following, the inner loop sees it has a
+// call (block 3) on every path from the head (block 2) to the
+// backedge (arc 3->2).  So it deletes the ncsfpt (non-call safepoint)
+// in block 2, _but_ this leaves the outer loop without a safepoint.
+//
+//          entry  0
+//                 |
+//                 v
+// outer 1,2    +->1
+//              |  |
+//              |  v
+//              |  2<---+  ncsfpt in 2
+//              |_/|\   |
+//                 | v  |
+// inner 2,3      /  3  |  call in 3
+//               /   |  |
+//              v    +--+
+//        exit  4
+//
+//
+// This method creates a list (_required_safept) of ncsfpt nodes that must
+// be protected is created for each loop. When a ncsfpt maybe deleted, it
+// is first looked for in the lists for the outer loops of the current loop.
+//
+// The insights into the problem:
+//  A) counted loops are okay
+//  B) innermost loops are okay (only an inner loop can delete
+//     a ncsfpt needed by an outer loop)
+//  C) a loop is immune from an inner loop deleting a safepoint
+//     if the loop has a call on the idom-path
+//  D) a loop is also immune if it has a ncsfpt (non-call safepoint) on the
+//     idom-path that is not in a nested loop
+//  E) otherwise, an ncsfpt on the idom-path that is nested in an inner
+//     loop needs to be prevented from deletion by an inner loop
+//
+// There are two analyses:
+//  1) The first, and cheaper one, scans the loop body from
+//     tail to head following the idom (immediate dominator)
+//     chain, looking for the cases (C,D,E) above.
+//     Since inner loops are scanned before outer loops, there is summary
+//     information about inner loops.  Inner loops can be skipped over
+//     when the tail of an inner loop is encountered.
+//
+//  2) The second, invoked if the first fails to find a call or ncsfpt on
+//     the idom path (which is rare), scans all predecessor control paths
+//     from the tail to the head, terminating a path when a call or sfpt
+//     is encountered, to find the ncsfpt's that are closest to the tail.
+//
+void IdealLoopTree::check_safepts(VectorSet &visited, Node_List &stack) {
+  // Bottom up traversal
+  IdealLoopTree* ch = _child;
+  while (ch != NULL) {
+    ch->check_safepts(visited, stack);
+    ch = ch->_next;
+  }
+
+  if (!_head->is_CountedLoop() && !_has_sfpt && _parent != NULL && !_irreducible) {
+    bool  has_call         = false; // call on dom-path
+    bool  has_local_ncsfpt = false; // ncsfpt on dom-path at this loop depth
+    Node* nonlocal_ncsfpt  = NULL;  // ncsfpt on dom-path at a deeper depth
+    // Scan the dom-path nodes from tail to head
+    for (Node* n = tail(); n != _head; n = _phase->idom(n)) {
+      if (n->is_Call() && n->as_Call()->guaranteed_safepoint()) {
+        has_call = true;
+        _has_sfpt = 1;          // Then no need for a safept!
+        break;
+      } else if (n->Opcode() == Op_SafePoint) {
+        if (_phase->get_loop(n) == this) {
+          has_local_ncsfpt = true;
+          break;
+        }
+        if (nonlocal_ncsfpt == NULL) {
+          nonlocal_ncsfpt = n; // save the one closest to the tail
+        }
+      } else {
+        IdealLoopTree* nlpt = _phase->get_loop(n);
+        if (this != nlpt) {
+          // If at an inner loop tail, see if the inner loop has already
+          // recorded seeing a call on the dom-path (and stop.)  If not,
+          // jump to the head of the inner loop.
+          assert(is_member(nlpt), "nested loop");
+          Node* tail = nlpt->_tail;
+          if (tail->in(0)->is_If()) tail = tail->in(0);
+          if (n == tail) {
+            // If inner loop has call on dom-path, so does outer loop
+            if (nlpt->_has_sfpt) {
+              has_call = true;
+              _has_sfpt = 1;
+              break;
+            }
+            // Skip to head of inner loop
+            assert(_phase->is_dominator(_head, nlpt->_head), "inner head dominated by outer head");
+            n = nlpt->_head;
+          }
+        }
+      }
+    }
+    // Record safept's that this loop needs preserved when an
+    // inner loop attempts to delete it's safepoints.
+    if (_child != NULL && !has_call && !has_local_ncsfpt) {
+      if (nonlocal_ncsfpt != NULL) {
+        if (_required_safept == NULL) _required_safept = new Node_List();
+        _required_safept->push(nonlocal_ncsfpt);
+      } else {
+        // Failed to find a suitable safept on the dom-path.  Now use
+        // an all paths walk from tail to head, looking for safepoints to preserve.
+        allpaths_check_safepts(visited, stack);
+      }
+    }
+  }
+}
+
+//---------------------------is_deleteable_safept----------------------------
+// Is safept not required by an outer loop?
+bool PhaseIdealLoop::is_deleteable_safept(Node* sfpt) {
+  assert(sfpt->Opcode() == Op_SafePoint, "");
+  IdealLoopTree* lp = get_loop(sfpt)->_parent;
+  while (lp != NULL) {
+    Node_List* sfpts = lp->_required_safept;
+    if (sfpts != NULL) {
+      for (uint i = 0; i < sfpts->size(); i++) {
+        if (sfpt == sfpts->at(i))
+          return false;
+      }
+    }
+    lp = lp->_parent;
+  }
+  return true;
+}
+
+//------------------------------counted_loop-----------------------------------
+// Convert to counted loops where possible
+void IdealLoopTree::counted_loop( PhaseIdealLoop *phase ) {
+
+  // For grins, set the inner-loop flag here
+  if( !_child ) {
+    if( _head->is_Loop() ) _head->as_Loop()->set_inner_loop();
+  }
+
+  if( _head->is_CountedLoop() ||
+      phase->is_counted_loop( _head, this ) ) {
+    _has_sfpt = 1;              // Indicate we do not need a safepoint here
+
+    // Look for a safepoint to remove
+    for (Node* n = tail(); n != _head; n = phase->idom(n))
+      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
+          phase->is_deleteable_safept(n))
+        phase->lazy_replace(n,n->in(TypeFunc::Control));
+
+    CountedLoopNode *cl = _head->as_CountedLoop();
+    Node *incr = cl->incr();
+    if( !incr ) return;         // Dead loop?
+    Node *init = cl->init_trip();
+    Node *phi  = cl->phi();
+    // protect against stride not being a constant
+    if( !cl->stride_is_con() ) return;
+    int stride_con = cl->stride_con();
+
+    // Look for induction variables
+
+    // Visit all children, looking for Phis
+    for (DUIterator i = cl->outs(); cl->has_out(i); i++) {
+      Node *out = cl->out(i);
+      if (!out->is_Phi())  continue; // Looking for phis
+      PhiNode* phi2 = out->as_Phi();
+      Node *incr2 = phi2->in( LoopNode::LoopBackControl );
+      // Look for induction variables of the form:  X += constant
+      if( phi2->region() != _head ||
+          incr2->req() != 3 ||
+          incr2->in(1) != phi2 ||
+          incr2 == incr ||
+          incr2->Opcode() != Op_AddI ||
+          !incr2->in(2)->is_Con() )
+        continue;
+
+      // Check for parallel induction variable (parallel to trip counter)
+      // via an affine function.  In particular, count-down loops with
+      // count-up array indices are common. We only RCE references off
+      // the trip-counter, so we need to convert all these to trip-counter
+      // expressions.
+      Node *init2 = phi2->in( LoopNode::EntryControl );
+      int stride_con2 = incr2->in(2)->get_int();
+
+      // The general case here gets a little tricky.  We want to find the
+      // GCD of all possible parallel IV's and make a new IV using this
+      // GCD for the loop.  Then all possible IVs are simple multiples of
+      // the GCD.  In practice, this will cover very few extra loops.
+      // Instead we require 'stride_con2' to be a multiple of 'stride_con',
+      // where +/-1 is the common case, but other integer multiples are
+      // also easy to handle.
+      int ratio_con = stride_con2/stride_con;
+
+      if( ratio_con * stride_con == stride_con2 ) { // Check for exact
+        // Convert to using the trip counter.  The parallel induction
+        // variable differs from the trip counter by a loop-invariant
+        // amount, the difference between their respective initial values.
+        // It is scaled by the 'ratio_con'.
+        Compile* C = phase->C;
+        Node* ratio = phase->_igvn.intcon(ratio_con);
+        phase->set_ctrl(ratio, C->root());
+        Node* ratio_init = new (C, 3) MulINode(init, ratio);
+        phase->_igvn.register_new_node_with_optimizer(ratio_init, init);
+        phase->set_early_ctrl(ratio_init);
+        Node* diff = new (C, 3) SubINode(init2, ratio_init);
+        phase->_igvn.register_new_node_with_optimizer(diff, init2);
+        phase->set_early_ctrl(diff);
+        Node* ratio_idx = new (C, 3) MulINode(phi, ratio);
+        phase->_igvn.register_new_node_with_optimizer(ratio_idx, phi);
+        phase->set_ctrl(ratio_idx, cl);
+        Node* add  = new (C, 3) AddINode(ratio_idx, diff);
+        phase->_igvn.register_new_node_with_optimizer(add);
+        phase->set_ctrl(add, cl);
+        phase->_igvn.hash_delete( phi2 );
+        phase->_igvn.subsume_node( phi2, add );
+        // Sometimes an induction variable is unused
+        if (add->outcnt() == 0) {
+          phase->_igvn.remove_dead_node(add);
+        }
+        --i; // deleted this phi; rescan starting with next position
+        continue;
+      }
+    }
+  } else if (_parent != NULL && !_irreducible) {
+    // Not a counted loop.
+    // Look for a safepoint on the idom-path to remove, preserving the first one
+    bool found = false;
+    Node* n = tail();
+    for (; n != _head && !found; n = phase->idom(n)) {
+      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this)
+        found = true; // Found one
+    }
+    // Skip past it and delete the others
+    for (; n != _head; n = phase->idom(n)) {
+      if (n->Opcode() == Op_SafePoint && phase->get_loop(n) == this &&
+          phase->is_deleteable_safept(n))
+        phase->lazy_replace(n,n->in(TypeFunc::Control));
+    }
+  }
+
+  // Recursively
+  if( _child ) _child->counted_loop( phase );
+  if( _next  ) _next ->counted_loop( phase );
+}
+
+#ifndef PRODUCT
+//------------------------------dump_head--------------------------------------
+// Dump 1 liner for loop header info
+void IdealLoopTree::dump_head( ) const {
+  for( uint i=0; i<_nest; i++ )
+    tty->print("  ");
+  tty->print("Loop: N%d/N%d ",_head->_idx,_tail->_idx);
+  if( _irreducible ) tty->print(" IRREDUCIBLE");
+  if( _head->is_CountedLoop() ) {
+    CountedLoopNode *cl = _head->as_CountedLoop();
+    tty->print(" counted");
+    if( cl->is_pre_loop () ) tty->print(" pre" );
+    if( cl->is_main_loop() ) tty->print(" main");
+    if( cl->is_post_loop() ) tty->print(" post");
+  }
+  tty->cr();
+}
+
+//------------------------------dump-------------------------------------------
+// Dump loops by loop tree
+void IdealLoopTree::dump( ) const {
+  dump_head();
+  if( _child ) _child->dump();
+  if( _next  ) _next ->dump();
+}
+
+#endif
+
+//=============================================================================
+//------------------------------PhaseIdealLoop---------------------------------
+// Create a PhaseLoop.  Build the ideal Loop tree.  Map each Ideal Node to
+// its corresponding LoopNode.  If 'optimize' is true, do some loop cleanups.
+PhaseIdealLoop::PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs )
+  : PhaseTransform(Ideal_Loop),
+    _igvn(igvn),
+    _dom_lca_tags(C->comp_arena()) {
+  // Reset major-progress flag for the driver's heuristics
+  C->clear_major_progress();
+
+#ifndef PRODUCT
+  // Capture for later assert
+  uint unique = C->unique();
+  _loop_invokes++;
+  _loop_work += unique;
+#endif
+
+  // True if the method has at least 1 irreducible loop
+  _has_irreducible_loops = false;
+
+  _created_loop_node = false;
+
+  Arena *a = Thread::current()->resource_area();
+  VectorSet visited(a);
+  // Pre-grow the mapping from Nodes to IdealLoopTrees.
+  _nodes.map(C->unique(), NULL);
+  memset(_nodes.adr(), 0, wordSize * C->unique());
+
+  // Pre-build the top-level outermost loop tree entry
+  _ltree_root = new IdealLoopTree( this, C->root(), C->root() );
+  // Do not need a safepoint at the top level
+  _ltree_root->_has_sfpt = 1;
+
+  // Empty pre-order array
+  allocate_preorders();
+
+  // Build a loop tree on the fly.  Build a mapping from CFG nodes to
+  // IdealLoopTree entries.  Data nodes are NOT walked.
+  build_loop_tree();
+  // Check for bailout, and return
+  if (C->failing()) {
+    return;
+  }
+
+  // No loops after all
+  if( !_ltree_root->_child ) C->set_has_loops(false);
+
+  // There should always be an outer loop containing the Root and Return nodes.
+  // If not, we have a degenerate empty program.  Bail out in this case.
+  if (!has_node(C->root())) {
+    C->clear_major_progress();
+    C->record_method_not_compilable("empty program detected during loop optimization");
+    return;
+  }
+
+  // Nothing to do, so get out
+  if( !C->has_loops() && !do_split_ifs && !verify_me) {
+    _igvn.optimize();           // Cleanup NeverBranches
+    return;
+  }
+
+  // Set loop nesting depth
+  _ltree_root->set_nest( 0 );
+
+  // Split shared headers and insert loop landing pads.
+  // Do not bother doing this on the Root loop of course.
+  if( !verify_me && _ltree_root->_child ) {
+    if( _ltree_root->_child->beautify_loops( this ) ) {
+      // Re-build loop tree!
+      _ltree_root->_child = NULL;
+      _nodes.clear();
+      reallocate_preorders();
+      build_loop_tree();
+      // Check for bailout, and return
+      if (C->failing()) {
+        return;
+      }
+      // Reset loop nesting depth
+      _ltree_root->set_nest( 0 );
+    }
+  }
+
+  // Build Dominators for elision of NULL checks & loop finding.
+  // Since nodes do not have a slot for immediate dominator, make
+  // a persistant side array for that info indexed on node->_idx.
+  _idom_size = C->unique();
+  _idom      = NEW_RESOURCE_ARRAY( Node*, _idom_size );
+  _dom_depth = NEW_RESOURCE_ARRAY( uint,  _idom_size );
+  _dom_stk   = NULL; // Allocated on demand in recompute_dom_depth
+  memset( _dom_depth, 0, _idom_size * sizeof(uint) );
+
+  Dominators();
+
+  // As a side effect, Dominators removed any unreachable CFG paths
+  // into RegionNodes.  It doesn't do this test against Root, so
+  // we do it here.
+  for( uint i = 1; i < C->root()->req(); i++ ) {
+    if( !_nodes[C->root()->in(i)->_idx] ) {    // Dead path into Root?
+      _igvn.hash_delete(C->root());
+      C->root()->del_req(i);
+      _igvn._worklist.push(C->root());
+      i--;                      // Rerun same iteration on compressed edges
+    }
+  }
+
+  // Given dominators, try to find inner loops with calls that must
+  // always be executed (call dominates loop tail).  These loops do
+  // not need a seperate safepoint.
+  Node_List cisstack(a);
+  _ltree_root->check_safepts(visited, cisstack);
+
+  // Walk the DATA nodes and place into loops.  Find earliest control
+  // node.  For CFG nodes, the _nodes array starts out and remains
+  // holding the associated IdealLoopTree pointer.  For DATA nodes, the
+  // _nodes array holds the earliest legal controlling CFG node.
+
+  // Allocate stack with enough space to avoid frequent realloc
+  int stack_size = (C->unique() >> 1) + 16; // (unique>>1)+16 from Java2D stats
+  Node_Stack nstack( a, stack_size );
+
+  visited.Clear();
+  Node_List worklist(a);
+  // Don't need C->root() on worklist since
+  // it will be processed among C->top() inputs
+  worklist.push( C->top() );
+  visited.set( C->top()->_idx ); // Set C->top() as visited now
+  build_loop_early( visited, worklist, nstack, verify_me );
+
+  // Given early legal placement, try finding counted loops.  This placement
+  // is good enough to discover most loop invariants.
+  if( !verify_me )
+    _ltree_root->counted_loop( this );
+
+  // Find latest loop placement.  Find ideal loop placement.
+  visited.Clear();
+  init_dom_lca_tags();
+  // Need C->root() on worklist when processing outs
+  worklist.push( C->root() );
+  NOT_PRODUCT( C->verify_graph_edges(); )
+  worklist.push( C->top() );
+  build_loop_late( visited, worklist, nstack, verify_me );
+
+  // clear out the dead code
+  while(_deadlist.size()) {
+    igvn.remove_globally_dead_node(_deadlist.pop());
+  }
+
+#ifndef PRODUCT
+  C->verify_graph_edges();
+  if( verify_me ) {             // Nested verify pass?
+    // Check to see if the verify mode is broken
+    assert(C->unique() == unique, "non-optimize mode made Nodes? ? ?");
+    return;
+  }
+  if( VerifyLoopOptimizations ) verify();
+#endif
+
+  if (ReassociateInvariants) {
+    // Reassociate invariants and prep for split_thru_phi
+    for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+      IdealLoopTree* lpt = iter.current();
+      if (!lpt->is_counted() || !lpt->is_inner()) continue;
+
+      lpt->reassociate_invariants(this);
+
+      // Because RCE opportunities can be masked by split_thru_phi,
+      // look for RCE candidates and inhibit split_thru_phi
+      // on just their loop-phi's for this pass of loop opts
+      if( SplitIfBlocks && do_split_ifs ) {
+        if (lpt->policy_range_check(this)) {
+          lpt->_rce_candidate = true;
+        }
+      }
+    }
+  }
+
+  // Check for aggressive application of split-if and other transforms
+  // that require basic-block info (like cloning through Phi's)
+  if( SplitIfBlocks && do_split_ifs ) {
+    visited.Clear();
+    split_if_with_blocks( visited, nstack );
+    NOT_PRODUCT( if( VerifyLoopOptimizations ) verify(); );
+  }
+
+  // Perform iteration-splitting on inner loops.  Split iterations to avoid
+  // range checks or one-shot null checks.
+
+  // If split-if's didn't hack the graph too bad (no CFG changes)
+  // then do loop opts.
+  if( C->has_loops() && !C->major_progress() ) {
+    memset( worklist.adr(), 0, worklist.Size()*sizeof(Node*) );
+    _ltree_root->_child->iteration_split( this, worklist );
+    // No verify after peeling!  GCM has hoisted code out of the loop.
+    // After peeling, the hoisted code could sink inside the peeled area.
+    // The peeling code does not try to recompute the best location for
+    // all the code before the peeled area, so the verify pass will always
+    // complain about it.
+  }
+  // Do verify graph edges in any case
+  NOT_PRODUCT( C->verify_graph_edges(); );
+
+  if( !do_split_ifs ) {
+    // We saw major progress in Split-If to get here.  We forced a
+    // pass with unrolling and not split-if, however more split-if's
+    // might make progress.  If the unrolling didn't make progress
+    // then the major-progress flag got cleared and we won't try
+    // another round of Split-If.  In particular the ever-common
+    // instance-of/check-cast pattern requires at least 2 rounds of
+    // Split-If to clear out.
+    C->set_major_progress();
+  }
+
+  // Repeat loop optimizations if new loops were seen
+  if (created_loop_node()) {
+    C->set_major_progress();
+  }
+
+  // Convert scalar to superword operations
+
+  if (UseSuperWord && C->has_loops() && !C->major_progress()) {
+    // SuperWord transform
+    SuperWord sw(this);
+    for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+      IdealLoopTree* lpt = iter.current();
+      if (lpt->is_counted()) {
+        sw.transform_loop(lpt);
+      }
+    }
+  }
+
+  // Cleanup any modified bits
+  _igvn.optimize();
+
+  // Do not repeat loop optimizations if irreducible loops are present
+  // by claiming no-progress.
+  if( _has_irreducible_loops )
+    C->clear_major_progress();
+}
+
+#ifndef PRODUCT
+//------------------------------print_statistics-------------------------------
+int PhaseIdealLoop::_loop_invokes=0;// Count of PhaseIdealLoop invokes
+int PhaseIdealLoop::_loop_work=0; // Sum of PhaseIdealLoop x unique
+void PhaseIdealLoop::print_statistics() {
+  tty->print_cr("PhaseIdealLoop=%d, sum _unique=%d", _loop_invokes, _loop_work);
+}
+
+//------------------------------verify-----------------------------------------
+// Build a verify-only PhaseIdealLoop, and see that it agrees with me.
+static int fail;                // debug only, so its multi-thread dont care
+void PhaseIdealLoop::verify() const {
+  int old_progress = C->major_progress();
+  ResourceMark rm;
+  PhaseIdealLoop loop_verify( _igvn, this, false );
+  VectorSet visited(Thread::current()->resource_area());
+
+  fail = 0;
+  verify_compare( C->root(), &loop_verify, visited );
+  assert( fail == 0, "verify loops failed" );
+  // Verify loop structure is the same
+  _ltree_root->verify_tree(loop_verify._ltree_root, NULL);
+  // Reset major-progress.  It was cleared by creating a verify version of
+  // PhaseIdealLoop.
+  for( int i=0; i<old_progress; i++ )
+    C->set_major_progress();
+}
+
+//------------------------------verify_compare---------------------------------
+// Make sure me and the given PhaseIdealLoop agree on key data structures
+void PhaseIdealLoop::verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const {
+  if( !n ) return;
+  if( visited.test_set( n->_idx ) ) return;
+  if( !_nodes[n->_idx] ) {      // Unreachable
+    assert( !loop_verify->_nodes[n->_idx], "both should be unreachable" );
+    return;
+  }
+
+  uint i;
+  for( i = 0; i < n->req(); i++ )
+    verify_compare( n->in(i), loop_verify, visited );
+
+  // Check the '_nodes' block/loop structure
+  i = n->_idx;
+  if( has_ctrl(n) ) {           // We have control; verify has loop or ctrl
+    if( _nodes[i] != loop_verify->_nodes[i] &&
+        get_ctrl_no_update(n) != loop_verify->get_ctrl_no_update(n) ) {
+      tty->print("Mismatched control setting for: ");
+      n->dump();
+      if( fail++ > 10 ) return;
+      Node *c = get_ctrl_no_update(n);
+      tty->print("We have it as: ");
+      if( c->in(0) ) c->dump();
+        else tty->print_cr("N%d",c->_idx);
+      tty->print("Verify thinks: ");
+      if( loop_verify->has_ctrl(n) )
+        loop_verify->get_ctrl_no_update(n)->dump();
+      else
+        loop_verify->get_loop_idx(n)->dump();
+      tty->cr();
+    }
+  } else {                    // We have a loop
+    IdealLoopTree *us = get_loop_idx(n);
+    if( loop_verify->has_ctrl(n) ) {
+      tty->print("Mismatched loop setting for: ");
+      n->dump();
+      if( fail++ > 10 ) return;
+      tty->print("We have it as: ");
+      us->dump();
+      tty->print("Verify thinks: ");
+      loop_verify->get_ctrl_no_update(n)->dump();
+      tty->cr();
+    } else if (!C->major_progress()) {
+      // Loop selection can be messed up if we did a major progress
+      // operation, like split-if.  Do not verify in that case.
+      IdealLoopTree *them = loop_verify->get_loop_idx(n);
+      if( us->_head != them->_head ||  us->_tail != them->_tail ) {
+        tty->print("Unequals loops for: ");
+        n->dump();
+        if( fail++ > 10 ) return;
+        tty->print("We have it as: ");
+        us->dump();
+        tty->print("Verify thinks: ");
+        them->dump();
+        tty->cr();
+      }
+    }
+  }
+
+  // Check for immediate dominators being equal
+  if( i >= _idom_size ) {
+    if( !n->is_CFG() ) return;
+    tty->print("CFG Node with no idom: ");
+    n->dump();
+    return;
+  }
+  if( !n->is_CFG() ) return;
+  if( n == C->root() ) return; // No IDOM here
+
+  assert(n->_idx == i, "sanity");
+  Node *id = idom_no_update(n);
+  if( id != loop_verify->idom_no_update(n) ) {
+    tty->print("Unequals idoms for: ");
+    n->dump();
+    if( fail++ > 10 ) return;
+    tty->print("We have it as: ");
+    id->dump();
+    tty->print("Verify thinks: ");
+    loop_verify->idom_no_update(n)->dump();
+    tty->cr();
+  }
+
+}
+
+//------------------------------verify_tree------------------------------------
+// Verify that tree structures match.  Because the CFG can change, siblings
+// within the loop tree can be reordered.  We attempt to deal with that by
+// reordering the verify's loop tree if possible.
+void IdealLoopTree::verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const {
+  assert( _parent == parent, "Badly formed loop tree" );
+
+  // Siblings not in same order?  Attempt to re-order.
+  if( _head != loop->_head ) {
+    // Find _next pointer to update
+    IdealLoopTree **pp = &loop->_parent->_child;
+    while( *pp != loop )
+      pp = &((*pp)->_next);
+    // Find proper sibling to be next
+    IdealLoopTree **nn = &loop->_next;
+    while( (*nn) && (*nn)->_head != _head )
+      nn = &((*nn)->_next);
+
+    // Check for no match.
+    if( !(*nn) ) {
+      // Annoyingly, irreducible loops can pick different headers
+      // after a major_progress operation, so the rest of the loop
+      // tree cannot be matched.
+      if (_irreducible && Compile::current()->major_progress())  return;
+      assert( 0, "failed to match loop tree" );
+    }
+
+    // Move (*nn) to (*pp)
+    IdealLoopTree *hit = *nn;
+    *nn = hit->_next;
+    hit->_next = loop;
+    *pp = loop;
+    loop = hit;
+    // Now try again to verify
+  }
+
+  assert( _head  == loop->_head , "mismatched loop head" );
+  Node *tail = _tail;           // Inline a non-updating version of
+  while( !tail->in(0) )         // the 'tail()' call.
+    tail = tail->in(1);
+  assert( tail == loop->_tail, "mismatched loop tail" );
+
+  // Counted loops that are guarded should be able to find their guards
+  if( _head->is_CountedLoop() && _head->as_CountedLoop()->is_main_loop() ) {
+    CountedLoopNode *cl = _head->as_CountedLoop();
+    Node *init = cl->init_trip();
+    Node *ctrl = cl->in(LoopNode::EntryControl);
+    assert( ctrl->Opcode() == Op_IfTrue || ctrl->Opcode() == Op_IfFalse, "" );
+    Node *iff  = ctrl->in(0);
+    assert( iff->Opcode() == Op_If, "" );
+    Node *bol  = iff->in(1);
+    assert( bol->Opcode() == Op_Bool, "" );
+    Node *cmp  = bol->in(1);
+    assert( cmp->Opcode() == Op_CmpI, "" );
+    Node *add  = cmp->in(1);
+    Node *opaq;
+    if( add->Opcode() == Op_Opaque1 ) {
+      opaq = add;
+    } else {
+      assert( add->Opcode() == Op_AddI || add->Opcode() == Op_ConI , "" );
+      assert( add == init, "" );
+      opaq = cmp->in(2);
+    }
+    assert( opaq->Opcode() == Op_Opaque1, "" );
+
+  }
+
+  if (_child != NULL)  _child->verify_tree(loop->_child, this);
+  if (_next  != NULL)  _next ->verify_tree(loop->_next,  parent);
+  // Innermost loops need to verify loop bodies,
+  // but only if no 'major_progress'
+  int fail = 0;
+  if (!Compile::current()->major_progress() && _child == NULL) {
+    for( uint i = 0; i < _body.size(); i++ ) {
+      Node *n = _body.at(i);
+      if (n->outcnt() == 0)  continue; // Ignore dead
+      uint j;
+      for( j = 0; j < loop->_body.size(); j++ )
+        if( loop->_body.at(j) == n )
+          break;
+      if( j == loop->_body.size() ) { // Not found in loop body
+        // Last ditch effort to avoid assertion: Its possible that we
+        // have some users (so outcnt not zero) but are still dead.
+        // Try to find from root.
+        if (Compile::current()->root()->find(n->_idx)) {
+          fail++;
+          tty->print("We have that verify does not: ");
+          n->dump();
+        }
+      }
+    }
+    for( uint i2 = 0; i2 < loop->_body.size(); i2++ ) {
+      Node *n = loop->_body.at(i2);
+      if (n->outcnt() == 0)  continue; // Ignore dead
+      uint j;
+      for( j = 0; j < _body.size(); j++ )
+        if( _body.at(j) == n )
+          break;
+      if( j == _body.size() ) { // Not found in loop body
+        // Last ditch effort to avoid assertion: Its possible that we
+        // have some users (so outcnt not zero) but are still dead.
+        // Try to find from root.
+        if (Compile::current()->root()->find(n->_idx)) {
+          fail++;
+          tty->print("Verify has that we do not: ");
+          n->dump();
+        }
+      }
+    }
+    assert( !fail, "loop body mismatch" );
+  }
+}
+
+#endif
+
+//------------------------------set_idom---------------------------------------
+void PhaseIdealLoop::set_idom(Node* d, Node* n, uint dom_depth) {
+  uint idx = d->_idx;
+  if (idx >= _idom_size) {
+    uint newsize = _idom_size<<1;
+    while( idx >= newsize ) {
+      newsize <<= 1;
+    }
+    _idom      = REALLOC_RESOURCE_ARRAY( Node*,     _idom,_idom_size,newsize);
+    _dom_depth = REALLOC_RESOURCE_ARRAY( uint, _dom_depth,_idom_size,newsize);
+    memset( _dom_depth + _idom_size, 0, (newsize - _idom_size) * sizeof(uint) );
+    _idom_size = newsize;
+  }
+  _idom[idx] = n;
+  _dom_depth[idx] = dom_depth;
+}
+
+//------------------------------recompute_dom_depth---------------------------------------
+// The dominator tree is constructed with only parent pointers.
+// This recomputes the depth in the tree by first tagging all
+// nodes as "no depth yet" marker.  The next pass then runs up
+// the dom tree from each node marked "no depth yet", and computes
+// the depth on the way back down.
+void PhaseIdealLoop::recompute_dom_depth() {
+  uint no_depth_marker = C->unique();
+  uint i;
+  // Initialize depth to "no depth yet"
+  for (i = 0; i < _idom_size; i++) {
+    if (_dom_depth[i] > 0 && _idom[i] != NULL) {
+     _dom_depth[i] = no_depth_marker;
+    }
+  }
+  if (_dom_stk == NULL) {
+    uint init_size = C->unique() / 100; // Guess that 1/100 is a reasonable initial size.
+    if (init_size < 10) init_size = 10;
+    _dom_stk = new (C->node_arena()) GrowableArray<uint>(C->node_arena(), init_size, 0, 0);
+  }
+  // Compute new depth for each node.
+  for (i = 0; i < _idom_size; i++) {
+    uint j = i;
+    // Run up the dom tree to find a node with a depth
+    while (_dom_depth[j] == no_depth_marker) {
+      _dom_stk->push(j);
+      j = _idom[j]->_idx;
+    }
+    // Compute the depth on the way back down this tree branch
+    uint dd = _dom_depth[j] + 1;
+    while (_dom_stk->length() > 0) {
+      uint j = _dom_stk->pop();
+      _dom_depth[j] = dd;
+      dd++;
+    }
+  }
+}
+
+//------------------------------sort-------------------------------------------
+// Insert 'loop' into the existing loop tree.  'innermost' is a leaf of the
+// loop tree, not the root.
+IdealLoopTree *PhaseIdealLoop::sort( IdealLoopTree *loop, IdealLoopTree *innermost ) {
+  if( !innermost ) return loop; // New innermost loop
+
+  int loop_preorder = get_preorder(loop->_head); // Cache pre-order number
+  assert( loop_preorder, "not yet post-walked loop" );
+  IdealLoopTree **pp = &innermost;      // Pointer to previous next-pointer
+  IdealLoopTree *l = *pp;               // Do I go before or after 'l'?
+
+  // Insert at start of list
+  while( l ) {                  // Insertion sort based on pre-order
+    if( l == loop ) return innermost; // Already on list!
+    int l_preorder = get_preorder(l->_head); // Cache pre-order number
+    assert( l_preorder, "not yet post-walked l" );
+    // Check header pre-order number to figure proper nesting
+    if( loop_preorder > l_preorder )
+      break;                    // End of insertion
+    // If headers tie (e.g., shared headers) check tail pre-order numbers.
+    // Since I split shared headers, you'd think this could not happen.
+    // BUT: I must first do the preorder numbering before I can discover I
+    // have shared headers, so the split headers all get the same preorder
+    // number as the RegionNode they split from.
+    if( loop_preorder == l_preorder &&
+        get_preorder(loop->_tail) < get_preorder(l->_tail) )
+      break;                    // Also check for shared headers (same pre#)
+    pp = &l->_parent;           // Chain up list
+    l = *pp;
+  }
+  // Link into list
+  // Point predecessor to me
+  *pp = loop;
+  // Point me to successor
+  IdealLoopTree *p = loop->_parent;
+  loop->_parent = l;            // Point me to successor
+  if( p ) sort( p, innermost ); // Insert my parents into list as well
+  return innermost;
+}
+
+//------------------------------build_loop_tree--------------------------------
+// I use a modified Vick/Tarjan algorithm.  I need pre- and a post- visit
+// bits.  The _nodes[] array is mapped by Node index and holds a NULL for
+// not-yet-pre-walked, pre-order # for pre-but-not-post-walked and holds the
+// tightest enclosing IdealLoopTree for post-walked.
+//
+// During my forward walk I do a short 1-layer lookahead to see if I can find
+// a loop backedge with that doesn't have any work on the backedge.  This
+// helps me construct nested loops with shared headers better.
+//
+// Once I've done the forward recursion, I do the post-work.  For each child
+// I check to see if there is a backedge.  Backedges define a loop!  I
+// insert an IdealLoopTree at the target of the backedge.
+//
+// During the post-work I also check to see if I have several children
+// belonging to different loops.  If so, then this Node is a decision point
+// where control flow can choose to change loop nests.  It is at this
+// decision point where I can figure out how loops are nested.  At this
+// time I can properly order the different loop nests from my children.
+// Note that there may not be any backedges at the decision point!
+//
+// Since the decision point can be far removed from the backedges, I can't
+// order my loops at the time I discover them.  Thus at the decision point
+// I need to inspect loop header pre-order numbers to properly nest my
+// loops.  This means I need to sort my childrens' loops by pre-order.
+// The sort is of size number-of-control-children, which generally limits
+// it to size 2 (i.e., I just choose between my 2 target loops).
+void PhaseIdealLoop::build_loop_tree() {
+  // Allocate stack of size C->unique()/2 to avoid frequent realloc
+  GrowableArray <Node *> bltstack(C->unique() >> 1);
+  Node *n = C->root();
+  bltstack.push(n);
+  int pre_order = 1;
+  int stack_size;
+
+  while ( ( stack_size = bltstack.length() ) != 0 ) {
+    n = bltstack.top(); // Leave node on stack
+    if ( !is_visited(n) ) {
+      // ---- Pre-pass Work ----
+      // Pre-walked but not post-walked nodes need a pre_order number.
+
+      set_preorder_visited( n, pre_order ); // set as visited
+
+      // ---- Scan over children ----
+      // Scan first over control projections that lead to loop headers.
+      // This helps us find inner-to-outer loops with shared headers better.
+
+      // Scan children's children for loop headers.
+      for ( int i = n->outcnt() - 1; i >= 0; --i ) {
+        Node* m = n->raw_out(i);       // Child
+        if( m->is_CFG() && !is_visited(m) ) { // Only for CFG children
+          // Scan over children's children to find loop
+          for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
+            Node* l = m->fast_out(j);
+            if( is_visited(l) &&       // Been visited?
+                !is_postvisited(l) &&  // But not post-visited
+                get_preorder(l) < pre_order ) { // And smaller pre-order
+              // Found!  Scan the DFS down this path before doing other paths
+              bltstack.push(m);
+              break;
+            }
+          }
+        }
+      }
+      pre_order++;
+    }
+    else if ( !is_postvisited(n) ) {
+      // Note: build_loop_tree_impl() adds out edges on rare occasions,
+      // such as com.sun.rsasign.am::a.
+      // For non-recursive version, first, process current children.
+      // On next iteration, check if additional children were added.
+      for ( int k = n->outcnt() - 1; k >= 0; --k ) {
+        Node* u = n->raw_out(k);
+        if ( u->is_CFG() && !is_visited(u) ) {
+          bltstack.push(u);
+        }
+      }
+      if ( bltstack.length() == stack_size ) {
+        // There were no additional children, post visit node now
+        (void)bltstack.pop(); // Remove node from stack
+        pre_order = build_loop_tree_impl( n, pre_order );
+        // Check for bailout
+        if (C->failing()) {
+          return;
+        }
+        // Check to grow _preorders[] array for the case when
+        // build_loop_tree_impl() adds new nodes.
+        check_grow_preorders();
+      }
+    }
+    else {
+      (void)bltstack.pop(); // Remove post-visited node from stack
+    }
+  }
+}
+
+//------------------------------build_loop_tree_impl---------------------------
+int PhaseIdealLoop::build_loop_tree_impl( Node *n, int pre_order ) {
+  // ---- Post-pass Work ----
+  // Pre-walked but not post-walked nodes need a pre_order number.
+
+  // Tightest enclosing loop for this Node
+  IdealLoopTree *innermost = NULL;
+
+  // For all children, see if any edge is a backedge.  If so, make a loop
+  // for it.  Then find the tightest enclosing loop for the self Node.
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node* m = n->fast_out(i);   // Child
+    if( n == m ) continue;      // Ignore control self-cycles
+    if( !m->is_CFG() ) continue;// Ignore non-CFG edges
+
+    IdealLoopTree *l;           // Child's loop
+    if( !is_postvisited(m) ) {  // Child visited but not post-visited?
+      // Found a backedge
+      assert( get_preorder(m) < pre_order, "should be backedge" );
+      // Check for the RootNode, which is already a LoopNode and is allowed
+      // to have multiple "backedges".
+      if( m == C->root()) {     // Found the root?
+        l = _ltree_root;        // Root is the outermost LoopNode
+      } else {                  // Else found a nested loop
+        // Insert a LoopNode to mark this loop.
+        l = new IdealLoopTree(this, m, n);
+      } // End of Else found a nested loop
+      if( !has_loop(m) )        // If 'm' does not already have a loop set
+        set_loop(m, l);         // Set loop header to loop now
+
+    } else {                    // Else not a nested loop
+      if( !_nodes[m->_idx] ) continue; // Dead code has no loop
+      l = get_loop(m);          // Get previously determined loop
+      // If successor is header of a loop (nest), move up-loop till it
+      // is a member of some outer enclosing loop.  Since there are no
+      // shared headers (I've split them already) I only need to go up
+      // at most 1 level.
+      while( l && l->_head == m ) // Successor heads loop?
+        l = l->_parent;         // Move up 1 for me
+      // If this loop is not properly parented, then this loop
+      // has no exit path out, i.e. its an infinite loop.
+      if( !l ) {
+        // Make loop "reachable" from root so the CFG is reachable.  Basically
+        // insert a bogus loop exit that is never taken.  'm', the loop head,
+        // points to 'n', one (of possibly many) fall-in paths.  There may be
+        // many backedges as well.
+
+        // Here I set the loop to be the root loop.  I could have, after
+        // inserting a bogus loop exit, restarted the recursion and found my
+        // new loop exit.  This would make the infinite loop a first-class
+        // loop and it would then get properly optimized.  What's the use of
+        // optimizing an infinite loop?
+        l = _ltree_root;        // Oops, found infinite loop
+
+        // Insert the NeverBranch between 'm' and it's control user.
+        NeverBranchNode *iff = new (C, 1) NeverBranchNode( m );
+        _igvn.register_new_node_with_optimizer(iff);
+        set_loop(iff, l);
+        Node *if_t = new (C, 1) CProjNode( iff, 0 );
+        _igvn.register_new_node_with_optimizer(if_t);
+        set_loop(if_t, l);
+
+        Node* cfg = NULL;       // Find the One True Control User of m
+        for (DUIterator_Fast jmax, j = m->fast_outs(jmax); j < jmax; j++) {
+          Node* x = m->fast_out(j);
+          if (x->is_CFG() && x != m && x != iff)
+            { cfg = x; break; }
+        }
+        assert(cfg != NULL, "must find the control user of m");
+        uint k = 0;             // Probably cfg->in(0)
+        while( cfg->in(k) != m ) k++; // But check incase cfg is a Region
+        cfg->set_req( k, if_t ); // Now point to NeverBranch
+
+        // Now create the never-taken loop exit
+        Node *if_f = new (C, 1) CProjNode( iff, 1 );
+        _igvn.register_new_node_with_optimizer(if_f);
+        set_loop(if_f, l);
+        // Find frame ptr for Halt.  Relies on the optimizer
+        // V-N'ing.  Easier and quicker than searching through
+        // the program structure.
+        Node *frame = new (C, 1) ParmNode( C->start(), TypeFunc::FramePtr );
+        _igvn.register_new_node_with_optimizer(frame);
+        // Halt & Catch Fire
+        Node *halt = new (C, TypeFunc::Parms) HaltNode( if_f, frame );
+        _igvn.register_new_node_with_optimizer(halt);
+        set_loop(halt, l);
+        C->root()->add_req(halt);
+        set_loop(C->root(), _ltree_root);
+      }
+    }
+    // Weeny check for irreducible.  This child was already visited (this
+    // IS the post-work phase).  Is this child's loop header post-visited
+    // as well?  If so, then I found another entry into the loop.
+    while( is_postvisited(l->_head) ) {
+      // found irreducible
+      l->_irreducible = true;
+      l = l->_parent;
+      _has_irreducible_loops = true;
+      // Check for bad CFG here to prevent crash, and bailout of compile
+      if (l == NULL) {
+        C->record_method_not_compilable("unhandled CFG detected during loop optimization");
+        return pre_order;
+      }
+    }
+
+    // This Node might be a decision point for loops.  It is only if
+    // it's children belong to several different loops.  The sort call
+    // does a trivial amount of work if there is only 1 child or all
+    // children belong to the same loop.  If however, the children
+    // belong to different loops, the sort call will properly set the
+    // _parent pointers to show how the loops nest.
+    //
+    // In any case, it returns the tightest enclosing loop.
+    innermost = sort( l, innermost );
+  }
+
+  // Def-use info will have some dead stuff; dead stuff will have no
+  // loop decided on.
+
+  // Am I a loop header?  If so fix up my parent's child and next ptrs.
+  if( innermost && innermost->_head == n ) {
+    assert( get_loop(n) == innermost, "" );
+    IdealLoopTree *p = innermost->_parent;
+    IdealLoopTree *l = innermost;
+    while( p && l->_head == n ) {
+      l->_next = p->_child;     // Put self on parents 'next child'
+      p->_child = l;            // Make self as first child of parent
+      l = p;                    // Now walk up the parent chain
+      p = l->_parent;
+    }
+  } else {
+    // Note that it is possible for a LoopNode to reach here, if the
+    // backedge has been made unreachable (hence the LoopNode no longer
+    // denotes a Loop, and will eventually be removed).
+
+    // Record tightest enclosing loop for self.  Mark as post-visited.
+    set_loop(n, innermost);
+    // Also record has_call flag early on
+    if( innermost ) {
+      if( n->is_Call() && !n->is_CallLeaf() && !n->is_macro() ) {
+        // Do not count uncommon calls
+        if( !n->is_CallStaticJava() || !n->as_CallStaticJava()->_name ) {
+          Node *iff = n->in(0)->in(0);
+          if( !iff->is_If() ||
+              (n->in(0)->Opcode() == Op_IfFalse &&
+               (1.0 - iff->as_If()->_prob) >= 0.01) ||
+              (iff->as_If()->_prob >= 0.01) )
+            innermost->_has_call = 1;
+        }
+      }
+    }
+  }
+
+  // Flag as post-visited now
+  set_postvisited(n);
+  return pre_order;
+}
+
+
+//------------------------------build_loop_early-------------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// First pass computes the earliest controlling node possible.  This is the
+// controlling input with the deepest dominating depth.
+void PhaseIdealLoop::build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+  while (worklist.size() != 0) {
+    // Use local variables nstack_top_n & nstack_top_i to cache values
+    // on nstack's top.
+    Node *nstack_top_n = worklist.pop();
+    uint  nstack_top_i = 0;
+//while_nstack_nonempty:
+    while (true) {
+      // Get parent node and next input's index from stack's top.
+      Node  *n = nstack_top_n;
+      uint   i = nstack_top_i;
+      uint cnt = n->req(); // Count of inputs
+      if (i == 0) {        // Pre-process the node.
+        if( has_node(n) &&            // Have either loop or control already?
+            !has_ctrl(n) ) {          // Have loop picked out already?
+          // During "merge_many_backedges" we fold up several nested loops
+          // into a single loop.  This makes the members of the original
+          // loop bodies pointing to dead loops; they need to move up
+          // to the new UNION'd larger loop.  I set the _head field of these
+          // dead loops to NULL and the _parent field points to the owning
+          // loop.  Shades of UNION-FIND algorithm.
+          IdealLoopTree *ilt;
+          while( !(ilt = get_loop(n))->_head ) {
+            // Normally I would use a set_loop here.  But in this one special
+            // case, it is legal (and expected) to change what loop a Node
+            // belongs to.
+            _nodes.map(n->_idx, (Node*)(ilt->_parent) );
+          }
+          // Remove safepoints ONLY if I've already seen I don't need one.
+          // (the old code here would yank a 2nd safepoint after seeing a
+          // first one, even though the 1st did not dominate in the loop body
+          // and thus could be avoided indefinitely)
+          if( !verify_me && ilt->_has_sfpt && n->Opcode() == Op_SafePoint &&
+              is_deleteable_safept(n)) {
+            Node *in = n->in(TypeFunc::Control);
+            lazy_replace(n,in);       // Pull safepoint now
+            // Carry on with the recursion "as if" we are walking
+            // only the control input
+            if( !visited.test_set( in->_idx ) ) {
+              worklist.push(in);      // Visit this guy later, using worklist
+            }
+            // Get next node from nstack:
+            // - skip n's inputs processing by setting i > cnt;
+            // - we also will not call set_early_ctrl(n) since
+            //   has_node(n) == true (see the condition above).
+            i = cnt + 1;
+          }
+        }
+      } // if (i == 0)
+
+      // Visit all inputs
+      bool done = true;       // Assume all n's inputs will be processed
+      while (i < cnt) {
+        Node *in = n->in(i);
+        ++i;
+        if (in == NULL) continue;
+        if (in->pinned() && !in->is_CFG())
+          set_ctrl(in, in->in(0));
+        int is_visited = visited.test_set( in->_idx );
+        if (!has_node(in)) {  // No controlling input yet?
+          assert( !in->is_CFG(), "CFG Node with no controlling input?" );
+          assert( !is_visited, "visit only once" );
+          nstack.push(n, i);  // Save parent node and next input's index.
+          nstack_top_n = in;  // Process current input now.
+          nstack_top_i = 0;
+          done = false;       // Not all n's inputs processed.
+          break; // continue while_nstack_nonempty;
+        } else if (!is_visited) {
+          // This guy has a location picked out for him, but has not yet
+          // been visited.  Happens to all CFG nodes, for instance.
+          // Visit him using the worklist instead of recursion, to break
+          // cycles.  Since he has a location already we do not need to
+          // find his location before proceeding with the current Node.
+          worklist.push(in);  // Visit this guy later, using worklist
+        }
+      }
+      if (done) {
+        // All of n's inputs have been processed, complete post-processing.
+
+        // Compute earilest point this Node can go.
+        // CFG, Phi, pinned nodes already know their controlling input.
+        if (!has_node(n)) {
+          // Record earliest legal location
+          set_early_ctrl( n );
+        }
+        if (nstack.is_empty()) {
+          // Finished all nodes on stack.
+          // Process next node on the worklist.
+          break;
+        }
+        // Get saved parent node and next input's index.
+        nstack_top_n = nstack.node();
+        nstack_top_i = nstack.index();
+        nstack.pop();
+      }
+    } // while (true)
+  }
+}
+
+//------------------------------dom_lca_internal--------------------------------
+// Pair-wise LCA
+Node *PhaseIdealLoop::dom_lca_internal( Node *n1, Node *n2 ) const {
+  if( !n1 ) return n2;          // Handle NULL original LCA
+  assert( n1->is_CFG(), "" );
+  assert( n2->is_CFG(), "" );
+  // find LCA of all uses
+  uint d1 = dom_depth(n1);
+  uint d2 = dom_depth(n2);
+  while (n1 != n2) {
+    if (d1 > d2) {
+      n1 =      idom(n1);
+      d1 = dom_depth(n1);
+    } else if (d1 < d2) {
+      n2 =      idom(n2);
+      d2 = dom_depth(n2);
+    } else {
+      // Here d1 == d2.  Due to edits of the dominator-tree, sections
+      // of the tree might have the same depth.  These sections have
+      // to be searched more carefully.
+
+      // Scan up all the n1's with equal depth, looking for n2.
+      Node *t1 = idom(n1);
+      while (dom_depth(t1) == d1) {
+        if (t1 == n2)  return n2;
+        t1 = idom(t1);
+      }
+      // Scan up all the n2's with equal depth, looking for n1.
+      Node *t2 = idom(n2);
+      while (dom_depth(t2) == d2) {
+        if (t2 == n1)  return n1;
+        t2 = idom(t2);
+      }
+      // Move up to a new dominator-depth value as well as up the dom-tree.
+      n1 = t1;
+      n2 = t2;
+      d1 = dom_depth(n1);
+      d2 = dom_depth(n2);
+    }
+  }
+  return n1;
+}
+
+//------------------------------compute_idom-----------------------------------
+// Locally compute IDOM using dom_lca call.  Correct only if the incoming
+// IDOMs are correct.
+Node *PhaseIdealLoop::compute_idom( Node *region ) const {
+  assert( region->is_Region(), "" );
+  Node *LCA = NULL;
+  for( uint i = 1; i < region->req(); i++ ) {
+    if( region->in(i) != C->top() )
+      LCA = dom_lca( LCA, region->in(i) );
+  }
+  return LCA;
+}
+
+//------------------------------get_late_ctrl----------------------------------
+// Compute latest legal control.
+Node *PhaseIdealLoop::get_late_ctrl( Node *n, Node *early ) {
+  assert(early != NULL, "early control should not be NULL");
+
+  // Compute LCA over list of uses
+  Node *LCA = NULL;
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax && LCA != early; i++) {
+    Node* c = n->fast_out(i);
+    if (_nodes[c->_idx] == NULL)
+      continue;                 // Skip the occasional dead node
+    if( c->is_Phi() ) {         // For Phis, we must land above on the path
+      for( uint j=1; j<c->req(); j++ ) {// For all inputs
+        if( c->in(j) == n ) {   // Found matching input?
+          Node *use = c->in(0)->in(j);
+          LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+        }
+      }
+    } else {
+      // For CFG data-users, use is in the block just prior
+      Node *use = has_ctrl(c) ? get_ctrl(c) : c->in(0);
+      LCA = dom_lca_for_get_late_ctrl( LCA, use, n );
+    }
+  }
+
+  // if this is a load, check for anti-dependent stores
+  // We use a conservative algorithm to identify potential interfering
+  // instructions and for rescheduling the load.  The users of the memory
+  // input of this load are examined.  Any use which is not a load and is
+  // dominated by early is considered a potentially interfering store.
+  // This can produce false positives.
+  if (n->is_Load() && LCA != early) {
+    Node_List worklist;
+
+    Node *mem = n->in(MemNode::Memory);
+    for (DUIterator_Fast imax, i = mem->fast_outs(imax); i < imax; i++) {
+      Node* s = mem->fast_out(i);
+      worklist.push(s);
+    }
+    while(worklist.size() != 0 && LCA != early) {
+      Node* s = worklist.pop();
+      if (s->is_Load()) {
+        continue;
+      } else if (s->is_MergeMem()) {
+        for (DUIterator_Fast imax, i = s->fast_outs(imax); i < imax; i++) {
+          Node* s1 = s->fast_out(i);
+          worklist.push(s1);
+        }
+      } else {
+        Node *sctrl = has_ctrl(s) ? get_ctrl(s) : s->in(0);
+        assert(sctrl != NULL || s->outcnt() == 0, "must have control");
+        if (sctrl != NULL && !sctrl->is_top() && is_dominator(early, sctrl)) {
+          LCA = dom_lca_for_get_late_ctrl(LCA, sctrl, n);
+        }
+      }
+    }
+  }
+
+  assert(LCA == find_non_split_ctrl(LCA), "unexpected late control");
+  return LCA;
+}
+
+// true if CFG node d dominates CFG node n
+bool PhaseIdealLoop::is_dominator(Node *d, Node *n) {
+  if (d == n)
+    return true;
+  assert(d->is_CFG() && n->is_CFG(), "must have CFG nodes");
+  uint dd = dom_depth(d);
+  while (dom_depth(n) >= dd) {
+    if (n == d)
+      return true;
+    n = idom(n);
+  }
+  return false;
+}
+
+//------------------------------dom_lca_for_get_late_ctrl_internal-------------
+// Pair-wise LCA with tags.
+// Tag each index with the node 'tag' currently being processed
+// before advancing up the dominator chain using idom().
+// Later calls that find a match to 'tag' know that this path has already
+// been considered in the current LCA (which is input 'n1' by convention).
+// Since get_late_ctrl() is only called once for each node, the tag array
+// does not need to be cleared between calls to get_late_ctrl().
+// Algorithm trades a larger constant factor for better asymptotic behavior
+//
+Node *PhaseIdealLoop::dom_lca_for_get_late_ctrl_internal( Node *n1, Node *n2, Node *tag ) {
+  uint d1 = dom_depth(n1);
+  uint d2 = dom_depth(n2);
+
+  do {
+    if (d1 > d2) {
+      // current lca is deeper than n2
+      _dom_lca_tags.map(n1->_idx, tag);
+      n1 =      idom(n1);
+      d1 = dom_depth(n1);
+    } else if (d1 < d2) {
+      // n2 is deeper than current lca
+      Node *memo = _dom_lca_tags[n2->_idx];
+      if( memo == tag ) {
+        return n1;    // Return the current LCA
+      }
+      _dom_lca_tags.map(n2->_idx, tag);
+      n2 =      idom(n2);
+      d2 = dom_depth(n2);
+    } else {
+      // Here d1 == d2.  Due to edits of the dominator-tree, sections
+      // of the tree might have the same depth.  These sections have
+      // to be searched more carefully.
+
+      // Scan up all the n1's with equal depth, looking for n2.
+      _dom_lca_tags.map(n1->_idx, tag);
+      Node *t1 = idom(n1);
+      while (dom_depth(t1) == d1) {
+        if (t1 == n2)  return n2;
+        _dom_lca_tags.map(t1->_idx, tag);
+        t1 = idom(t1);
+      }
+      // Scan up all the n2's with equal depth, looking for n1.
+      _dom_lca_tags.map(n2->_idx, tag);
+      Node *t2 = idom(n2);
+      while (dom_depth(t2) == d2) {
+        if (t2 == n1)  return n1;
+        _dom_lca_tags.map(t2->_idx, tag);
+        t2 = idom(t2);
+      }
+      // Move up to a new dominator-depth value as well as up the dom-tree.
+      n1 = t1;
+      n2 = t2;
+      d1 = dom_depth(n1);
+      d2 = dom_depth(n2);
+    }
+  } while (n1 != n2);
+  return n1;
+}
+
+//------------------------------init_dom_lca_tags------------------------------
+// Tag could be a node's integer index, 32bits instead of 64bits in some cases
+// Intended use does not involve any growth for the array, so it could
+// be of fixed size.
+void PhaseIdealLoop::init_dom_lca_tags() {
+  uint limit = C->unique() + 1;
+  _dom_lca_tags.map( limit, NULL );
+#ifdef ASSERT
+  for( uint i = 0; i < limit; ++i ) {
+    assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
+  }
+#endif // ASSERT
+}
+
+//------------------------------clear_dom_lca_tags------------------------------
+// Tag could be a node's integer index, 32bits instead of 64bits in some cases
+// Intended use does not involve any growth for the array, so it could
+// be of fixed size.
+void PhaseIdealLoop::clear_dom_lca_tags() {
+  uint limit = C->unique() + 1;
+  _dom_lca_tags.map( limit, NULL );
+  _dom_lca_tags.clear();
+#ifdef ASSERT
+  for( uint i = 0; i < limit; ++i ) {
+    assert(_dom_lca_tags[i] == NULL, "Must be distinct from each node pointer");
+  }
+#endif // ASSERT
+}
+
+//------------------------------build_loop_late--------------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// Second pass finds latest legal placement, and ideal loop placement.
+void PhaseIdealLoop::build_loop_late( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me ) {
+  while (worklist.size() != 0) {
+    Node *n = worklist.pop();
+    // Only visit once
+    if (visited.test_set(n->_idx)) continue;
+    uint cnt = n->outcnt();
+    uint   i = 0;
+    while (true) {
+      assert( _nodes[n->_idx], "no dead nodes" );
+      // Visit all children
+      if (i < cnt) {
+        Node* use = n->raw_out(i);
+        ++i;
+        // Check for dead uses.  Aggressively prune such junk.  It might be
+        // dead in the global sense, but still have local uses so I cannot
+        // easily call 'remove_dead_node'.
+        if( _nodes[use->_idx] != NULL || use->is_top() ) { // Not dead?
+          // Due to cycles, we might not hit the same fixed point in the verify
+          // pass as we do in the regular pass.  Instead, visit such phis as
+          // simple uses of the loop head.
+          if( use->in(0) && (use->is_CFG() || use->is_Phi()) ) {
+            if( !visited.test(use->_idx) )
+              worklist.push(use);
+          } else if( !visited.test_set(use->_idx) ) {
+            nstack.push(n, i); // Save parent and next use's index.
+            n   = use;         // Process all children of current use.
+            cnt = use->outcnt();
+            i   = 0;
+          }
+        } else {
+          // Do not visit around the backedge of loops via data edges.
+          // push dead code onto a worklist
+          _deadlist.push(use);
+        }
+      } else {
+        // All of n's children have been processed, complete post-processing.
+        build_loop_late_post(n, verify_me);
+        if (nstack.is_empty()) {
+          // Finished all nodes on stack.
+          // Process next node on the worklist.
+          break;
+        }
+        // Get saved parent node and next use's index. Visit the rest of uses.
+        n   = nstack.node();
+        cnt = n->outcnt();
+        i   = nstack.index();
+        nstack.pop();
+      }
+    }
+  }
+}
+
+//------------------------------build_loop_late_post---------------------------
+// Put Data nodes into some loop nest, by setting the _nodes[]->loop mapping.
+// Second pass finds latest legal placement, and ideal loop placement.
+void PhaseIdealLoop::build_loop_late_post( Node *n, const PhaseIdealLoop *verify_me ) {
+
+  if (n->req() == 2 && n->Opcode() == Op_ConvI2L && !C->major_progress()) {
+    _igvn._worklist.push(n);  // Maybe we'll normalize it, if no more loops.
+  }
+
+  // CFG and pinned nodes already handled
+  if( n->in(0) ) {
+    if( n->in(0)->is_top() ) return; // Dead?
+
+    // We'd like +VerifyLoopOptimizations to not believe that Mod's/Loads
+    // _must_ be pinned (they have to observe their control edge of course).
+    // Unlike Stores (which modify an unallocable resource, the memory
+    // state), Mods/Loads can float around.  So free them up.
+    bool pinned = true;
+    switch( n->Opcode() ) {
+    case Op_DivI:
+    case Op_DivF:
+    case Op_DivD:
+    case Op_ModI:
+    case Op_ModF:
+    case Op_ModD:
+    case Op_LoadB:              // Same with Loads; they can sink
+    case Op_LoadC:              // during loop optimizations.
+    case Op_LoadD:
+    case Op_LoadF:
+    case Op_LoadI:
+    case Op_LoadKlass:
+    case Op_LoadL:
+    case Op_LoadS:
+    case Op_LoadP:
+    case Op_LoadRange:
+    case Op_LoadD_unaligned:
+    case Op_LoadL_unaligned:
+    case Op_StrComp:            // Does a bunch of load-like effects
+      pinned = false;
+    }
+    if( pinned ) {
+      IdealLoopTree *choosen_loop = get_loop(n->is_CFG() ? n : get_ctrl(n));
+      if( !choosen_loop->_child )       // Inner loop?
+        choosen_loop->_body.push(n); // Collect inner loops
+      return;
+    }
+  } else {                      // No slot zero
+    if( n->is_CFG() ) {         // CFG with no slot 0 is dead
+      _nodes.map(n->_idx,0);    // No block setting, it's globally dead
+      return;
+    }
+    assert(!n->is_CFG() || n->outcnt() == 0, "");
+  }
+
+  // Do I have a "safe range" I can select over?
+  Node *early = get_ctrl(n);// Early location already computed
+
+  // Compute latest point this Node can go
+  Node *LCA = get_late_ctrl( n, early );
+  // LCA is NULL due to uses being dead
+  if( LCA == NULL ) {
+#ifdef ASSERT
+    for (DUIterator i1 = n->outs(); n->has_out(i1); i1++) {
+      assert( _nodes[n->out(i1)->_idx] == NULL, "all uses must also be dead");
+    }
+#endif
+    _nodes.map(n->_idx, 0);     // This node is useless
+    _deadlist.push(n);
+    return;
+  }
+  assert(LCA != NULL && !LCA->is_top(), "no dead nodes");
+
+  Node *legal = LCA;            // Walk 'legal' up the IDOM chain
+  Node *least = legal;          // Best legal position so far
+  while( early != legal ) {     // While not at earliest legal
+    // Find least loop nesting depth
+    legal = idom(legal);        // Bump up the IDOM tree
+    // Check for lower nesting depth
+    if( get_loop(legal)->_nest < get_loop(least)->_nest )
+      least = legal;
+  }
+
+  // Try not to place code on a loop entry projection
+  // which can inhibit range check elimination.
+  if (least != early) {
+    Node* ctrl_out = least->unique_ctrl_out();
+    if (ctrl_out && ctrl_out->is_CountedLoop() &&
+        least == ctrl_out->in(LoopNode::EntryControl)) {
+      Node* least_dom = idom(least);
+      if (get_loop(least_dom)->is_member(get_loop(least))) {
+        least = least_dom;
+      }
+    }
+  }
+
+#ifdef ASSERT
+  // If verifying, verify that 'verify_me' has a legal location
+  // and choose it as our location.
+  if( verify_me ) {
+    Node *v_ctrl = verify_me->get_ctrl_no_update(n);
+    Node *legal = LCA;
+    while( early != legal ) {   // While not at earliest legal
+      if( legal == v_ctrl ) break;  // Check for prior good location
+      legal = idom(legal)      ;// Bump up the IDOM tree
+    }
+    // Check for prior good location
+    if( legal == v_ctrl ) least = legal; // Keep prior if found
+  }
+#endif
+
+  // Assign discovered "here or above" point
+  least = find_non_split_ctrl(least);
+  set_ctrl(n, least);
+
+  // Collect inner loop bodies
+  IdealLoopTree *choosen_loop = get_loop(least);
+  if( !choosen_loop->_child )   // Inner loop?
+    choosen_loop->_body.push(n);// Collect inner loops
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+void PhaseIdealLoop::dump( ) const {
+  ResourceMark rm;
+  Arena* arena = Thread::current()->resource_area();
+  Node_Stack stack(arena, C->unique() >> 2);
+  Node_List rpo_list;
+  VectorSet visited(arena);
+  visited.set(C->top()->_idx);
+  rpo( C->root(), stack, visited, rpo_list );
+  // Dump root loop indexed by last element in PO order
+  dump( _ltree_root, rpo_list.size(), rpo_list );
+}
+
+void PhaseIdealLoop::dump( IdealLoopTree *loop, uint idx, Node_List &rpo_list ) const {
+
+  // Indent by loop nesting depth
+  for( uint x = 0; x < loop->_nest; x++ )
+    tty->print("  ");
+  tty->print_cr("---- Loop N%d-N%d ----", loop->_head->_idx,loop->_tail->_idx);
+
+  // Now scan for CFG nodes in the same loop
+  for( uint j=idx; j > 0;  j-- ) {
+    Node *n = rpo_list[j-1];
+    if( !_nodes[n->_idx] )      // Skip dead nodes
+      continue;
+    if( get_loop(n) != loop ) { // Wrong loop nest
+      if( get_loop(n)->_head == n &&    // Found nested loop?
+          get_loop(n)->_parent == loop )
+        dump(get_loop(n),rpo_list.size(),rpo_list);     // Print it nested-ly
+      continue;
+    }
+
+    // Dump controlling node
+    for( uint x = 0; x < loop->_nest; x++ )
+      tty->print("  ");
+    tty->print("C");
+    if( n == C->root() ) {
+      n->dump();
+    } else {
+      Node* cached_idom   = idom_no_update(n);
+      Node *computed_idom = n->in(0);
+      if( n->is_Region() ) {
+        computed_idom = compute_idom(n);
+        // computed_idom() will return n->in(0) when idom(n) is an IfNode (or
+        // any MultiBranch ctrl node), so apply a similar transform to
+        // the cached idom returned from idom_no_update.
+        cached_idom = find_non_split_ctrl(cached_idom);
+      }
+      tty->print(" ID:%d",computed_idom->_idx);
+      n->dump();
+      if( cached_idom != computed_idom ) {
+        tty->print_cr("*** BROKEN IDOM!  Computed as: %d, cached as: %d",
+                      computed_idom->_idx, cached_idom->_idx);
+      }
+    }
+    // Dump nodes it controls
+    for( uint k = 0; k < _nodes.Size(); k++ ) {
+      // (k < C->unique() && get_ctrl(find(k)) == n)
+      if (k < C->unique() && _nodes[k] == (Node*)((intptr_t)n + 1)) {
+        Node *m = C->root()->find(k);
+        if( m && m->outcnt() > 0 ) {
+          if (!(has_ctrl(m) && get_ctrl_no_update(m) == n)) {
+            tty->print_cr("*** BROKEN CTRL ACCESSOR!  _nodes[k] is %p, ctrl is %p",
+                          _nodes[k], has_ctrl(m) ? get_ctrl_no_update(m) : NULL);
+          }
+          for( uint j = 0; j < loop->_nest; j++ )
+            tty->print("  ");
+          tty->print(" ");
+          m->dump();
+        }
+      }
+    }
+  }
+}
+
+// Collect a R-P-O for the whole CFG.
+// Result list is in post-order (scan backwards for RPO)
+void PhaseIdealLoop::rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const {
+  stk.push(start, 0);
+  visited.set(start->_idx);
+
+  while (stk.is_nonempty()) {
+    Node* m   = stk.node();
+    uint  idx = stk.index();
+    if (idx < m->outcnt()) {
+      stk.set_index(idx + 1);
+      Node* n = m->raw_out(idx);
+      if (n->is_CFG() && !visited.test_set(n->_idx)) {
+        stk.push(n, 0);
+      }
+    } else {
+      rpo_list.push(m);
+      stk.pop();
+    }
+  }
+}
+#endif
+
+
+//=============================================================================
+//------------------------------LoopTreeIterator-----------------------------------
+
+// Advance to next loop tree using a preorder, left-to-right traversal.
+void LoopTreeIterator::next() {
+  assert(!done(), "must not be done.");
+  if (_curnt->_child != NULL) {
+    _curnt = _curnt->_child;
+  } else if (_curnt->_next != NULL) {
+    _curnt = _curnt->_next;
+  } else {
+    while (_curnt != _root && _curnt->_next == NULL) {
+      _curnt = _curnt->_parent;
+    }
+    if (_curnt == _root) {
+      _curnt = NULL;
+      assert(done(), "must be done.");
+    } else {
+      assert(_curnt->_next != NULL, "must be more to do");
+      _curnt = _curnt->_next;
+    }
+  }
+}
diff --git a/src/share/vm/opto/loopnode.hpp b/src/share/vm/opto/loopnode.hpp
new file mode 100644
index 000000000..21ddf8015
--- /dev/null
+++ b/src/share/vm/opto/loopnode.hpp
@@ -0,0 +1,919 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class CmpNode;
+class CountedLoopEndNode;
+class CountedLoopNode;
+class IdealLoopTree;
+class LoopNode;
+class Node;
+class PhaseIdealLoop;
+class VectorSet;
+struct small_cache;
+
+//
+//                  I D E A L I Z E D   L O O P S
+//
+// Idealized loops are the set of loops I perform more interesting
+// transformations on, beyond simple hoisting.
+
+//------------------------------LoopNode---------------------------------------
+// Simple loop header.  Fall in path on left, loop-back path on right.
+class LoopNode : public RegionNode {
+  // Size is bigger to hold the flags.  However, the flags do not change
+  // the semantics so it does not appear in the hash & cmp functions.
+  virtual uint size_of() const { return sizeof(*this); }
+protected:
+  short _loop_flags;
+  // Names for flag bitfields
+  enum { pre_post_main=0, inner_loop=8, partial_peel_loop=16, partial_peel_failed=32  };
+  char _unswitch_count;
+  enum { _unswitch_max=3 };
+
+public:
+  // Names for edge indices
+  enum { Self=0, EntryControl, LoopBackControl };
+
+  int is_inner_loop() const { return _loop_flags & inner_loop; }
+  void set_inner_loop() { _loop_flags |= inner_loop; }
+
+  int is_partial_peel_loop() const { return _loop_flags & partial_peel_loop; }
+  void set_partial_peel_loop() { _loop_flags |= partial_peel_loop; }
+  int partial_peel_has_failed() const { return _loop_flags & partial_peel_failed; }
+  void mark_partial_peel_failed() { _loop_flags |= partial_peel_failed; }
+
+  int unswitch_max() { return _unswitch_max; }
+  int unswitch_count() { return _unswitch_count; }
+  void set_unswitch_count(int val) {
+    assert (val <= unswitch_max(), "too many unswitches");
+    _unswitch_count = val;
+  }
+
+  LoopNode( Node *entry, Node *backedge ) : RegionNode(3), _loop_flags(0), _unswitch_count(0) {
+    init_class_id(Class_Loop);
+    init_req(EntryControl, entry);
+    init_req(LoopBackControl, backedge);
+  }
+
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int Opcode() const;
+  bool can_be_counted_loop(PhaseTransform* phase) const {
+    return req() == 3 && in(0) != NULL &&
+      in(1) != NULL && phase->type(in(1)) != Type::TOP &&
+      in(2) != NULL && phase->type(in(2)) != Type::TOP;
+  }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------Counted Loops----------------------------------
+// Counted loops are all trip-counted loops, with exactly 1 trip-counter exit
+// path (and maybe some other exit paths).  The trip-counter exit is always
+// last in the loop.  The trip-counter does not have to stride by a constant,
+// but it does have to stride by a loop-invariant amount; the exit value is
+// also loop invariant.
+
+// CountedLoopNodes and CountedLoopEndNodes come in matched pairs.  The
+// CountedLoopNode has the incoming loop control and the loop-back-control
+// which is always the IfTrue before the matching CountedLoopEndNode.  The
+// CountedLoopEndNode has an incoming control (possibly not the
+// CountedLoopNode if there is control flow in the loop), the post-increment
+// trip-counter value, and the limit.  The trip-counter value is always of
+// the form (Op old-trip-counter stride).  The old-trip-counter is produced
+// by a Phi connected to the CountedLoopNode.  The stride is loop invariant.
+// The Op is any commutable opcode, including Add, Mul, Xor.  The
+// CountedLoopEndNode also takes in the loop-invariant limit value.
+
+// From a CountedLoopNode I can reach the matching CountedLoopEndNode via the
+// loop-back control.  From CountedLoopEndNodes I can reach CountedLoopNodes
+// via the old-trip-counter from the Op node.
+
+//------------------------------CountedLoopNode--------------------------------
+// CountedLoopNodes head simple counted loops.  CountedLoopNodes have as
+// inputs the incoming loop-start control and the loop-back control, so they
+// act like RegionNodes.  They also take in the initial trip counter, the
+// loop-invariant stride and the loop-invariant limit value.  CountedLoopNodes
+// produce a loop-body control and the trip counter value.  Since
+// CountedLoopNodes behave like RegionNodes I still have a standard CFG model.
+
+class CountedLoopNode : public LoopNode {
+  // Size is bigger to hold _main_idx.  However, _main_idx does not change
+  // the semantics so it does not appear in the hash & cmp functions.
+  virtual uint size_of() const { return sizeof(*this); }
+
+  // For Pre- and Post-loops during debugging ONLY, this holds the index of
+  // the Main CountedLoop.  Used to assert that we understand the graph shape.
+  node_idx_t _main_idx;
+
+  // Known trip count calculated by policy_maximally_unroll
+  int   _trip_count;
+
+  // Expected trip count from profile data
+  float _profile_trip_cnt;
+
+  // Log2 of original loop bodies in unrolled loop
+  int _unrolled_count_log2;
+
+  // Node count prior to last unrolling - used to decide if
+  // unroll,optimize,unroll,optimize,... is making progress
+  int _node_count_before_unroll;
+
+public:
+  CountedLoopNode( Node *entry, Node *backedge )
+    : LoopNode(entry, backedge), _trip_count(max_jint),
+      _profile_trip_cnt(COUNT_UNKNOWN), _unrolled_count_log2(0),
+      _node_count_before_unroll(0) {
+    init_class_id(Class_CountedLoop);
+    // Initialize _trip_count to the largest possible value.
+    // Will be reset (lower) if the loop's trip count is known.
+  }
+
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  Node *init_control() const { return in(EntryControl); }
+  Node *back_control() const { return in(LoopBackControl); }
+  CountedLoopEndNode *loopexit() const;
+  Node *init_trip() const;
+  Node *stride() const;
+  int   stride_con() const;
+  bool  stride_is_con() const;
+  Node *limit() const;
+  Node *incr() const;
+  Node *phi() const;
+
+  // Match increment with optional truncation
+  static Node* match_incr_with_optional_truncation(Node* expr, Node** trunc1, Node** trunc2, const TypeInt** trunc_type);
+
+  // A 'main' loop has a pre-loop and a post-loop.  The 'main' loop
+  // can run short a few iterations and may start a few iterations in.
+  // It will be RCE'd and unrolled and aligned.
+
+  // A following 'post' loop will run any remaining iterations.  Used
+  // during Range Check Elimination, the 'post' loop will do any final
+  // iterations with full checks.  Also used by Loop Unrolling, where
+  // the 'post' loop will do any epilog iterations needed.  Basically,
+  // a 'post' loop can not profitably be further unrolled or RCE'd.
+
+  // A preceding 'pre' loop will run at least 1 iteration (to do peeling),
+  // it may do under-flow checks for RCE and may do alignment iterations
+  // so the following main loop 'knows' that it is striding down cache
+  // lines.
+
+  // A 'main' loop that is ONLY unrolled or peeled, never RCE'd or
+  // Aligned, may be missing it's pre-loop.
+  enum { Normal=0, Pre=1, Main=2, Post=3, PrePostFlagsMask=3, Main_Has_No_Pre_Loop=4 };
+  int is_normal_loop() const { return (_loop_flags&PrePostFlagsMask) == Normal; }
+  int is_pre_loop   () const { return (_loop_flags&PrePostFlagsMask) == Pre;    }
+  int is_main_loop  () const { return (_loop_flags&PrePostFlagsMask) == Main;   }
+  int is_post_loop  () const { return (_loop_flags&PrePostFlagsMask) == Post;   }
+  int is_main_no_pre_loop() const { return _loop_flags & Main_Has_No_Pre_Loop; }
+  void set_main_no_pre_loop() { _loop_flags |= Main_Has_No_Pre_Loop; }
+
+
+  void set_pre_loop  (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Pre ; _main_idx = main->_idx; }
+  void set_main_loop (                     ) { assert(is_normal_loop(),""); _loop_flags |= Main;                         }
+  void set_post_loop (CountedLoopNode *main) { assert(is_normal_loop(),""); _loop_flags |= Post; _main_idx = main->_idx; }
+  void set_normal_loop(                    ) { _loop_flags &= ~PrePostFlagsMask; }
+
+  void set_trip_count(int tc) { _trip_count = tc; }
+  int trip_count()            { return _trip_count; }
+
+  void set_profile_trip_cnt(float ptc) { _profile_trip_cnt = ptc; }
+  float profile_trip_cnt()             { return _profile_trip_cnt; }
+
+  void double_unrolled_count() { _unrolled_count_log2++; }
+  int  unrolled_count()        { return 1 << MIN2(_unrolled_count_log2, BitsPerInt-3); }
+
+  void set_node_count_before_unroll(int ct) { _node_count_before_unroll = ct; }
+  int  node_count_before_unroll()           { return _node_count_before_unroll; }
+
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------CountedLoopEndNode-----------------------------
+// CountedLoopEndNodes end simple trip counted loops.  They act much like
+// IfNodes.
+class CountedLoopEndNode : public IfNode {
+public:
+  enum { TestControl, TestValue };
+
+  CountedLoopEndNode( Node *control, Node *test, float prob, float cnt )
+    : IfNode( control, test, prob, cnt) {
+    init_class_id(Class_CountedLoopEnd);
+  }
+  virtual int Opcode() const;
+
+  Node *cmp_node() const            { return (in(TestValue)->req() >=2) ? in(TestValue)->in(1) : NULL; }
+  Node *incr() const                { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+  Node *limit() const               { Node *tmp = cmp_node(); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
+  Node *stride() const              { Node *tmp = incr    (); return (tmp && tmp->req()==3) ? tmp->in(2) : NULL; }
+  Node *phi() const                 { Node *tmp = incr    (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+  Node *init_trip() const           { Node *tmp = phi     (); return (tmp && tmp->req()==3) ? tmp->in(1) : NULL; }
+  int stride_con() const;
+  bool stride_is_con() const        { Node *tmp = stride  (); return (tmp != NULL && tmp->is_Con()); }
+  BoolTest::mask test_trip() const  { return in(TestValue)->as_Bool()->_test._test; }
+  CountedLoopNode *loopnode() const {
+    Node *ln = phi()->in(0);
+    assert( ln->Opcode() == Op_CountedLoop, "malformed loop" );
+    return (CountedLoopNode*)ln; }
+
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+
+inline CountedLoopEndNode *CountedLoopNode::loopexit() const {
+  Node *bc = back_control();
+  if( bc == NULL ) return NULL;
+  Node *le = bc->in(0);
+  if( le->Opcode() != Op_CountedLoopEnd )
+    return NULL;
+  return (CountedLoopEndNode*)le;
+}
+inline Node *CountedLoopNode::init_trip() const { return loopexit() ? loopexit()->init_trip() : NULL; }
+inline Node *CountedLoopNode::stride() const { return loopexit() ? loopexit()->stride() : NULL; }
+inline int CountedLoopNode::stride_con() const { return loopexit() ? loopexit()->stride_con() : 0; }
+inline bool CountedLoopNode::stride_is_con() const { return loopexit() && loopexit()->stride_is_con(); }
+inline Node *CountedLoopNode::limit() const { return loopexit() ? loopexit()->limit() : NULL; }
+inline Node *CountedLoopNode::incr() const { return loopexit() ? loopexit()->incr() : NULL; }
+inline Node *CountedLoopNode::phi() const { return loopexit() ? loopexit()->phi() : NULL; }
+
+
+// -----------------------------IdealLoopTree----------------------------------
+class IdealLoopTree : public ResourceObj {
+public:
+  IdealLoopTree *_parent;       // Parent in loop tree
+  IdealLoopTree *_next;         // Next sibling in loop tree
+  IdealLoopTree *_child;        // First child in loop tree
+
+  // The head-tail backedge defines the loop.
+  // If tail is NULL then this loop has multiple backedges as part of the
+  // same loop.  During cleanup I'll peel off the multiple backedges; merge
+  // them at the loop bottom and flow 1 real backedge into the loop.
+  Node *_head;                  // Head of loop
+  Node *_tail;                  // Tail of loop
+  inline Node *tail();          // Handle lazy update of _tail field
+  PhaseIdealLoop* _phase;
+
+  Node_List _body;              // Loop body for inner loops
+
+  uint8 _nest;                  // Nesting depth
+  uint8 _irreducible:1,         // True if irreducible
+        _has_call:1,            // True if has call safepoint
+        _has_sfpt:1,            // True if has non-call safepoint
+        _rce_candidate:1;       // True if candidate for range check elimination
+
+  Node_List* _required_safept;      // A inner loop cannot delete these safepts;
+
+  IdealLoopTree( PhaseIdealLoop* phase, Node *head, Node *tail )
+    : _parent(0), _next(0), _child(0),
+      _head(head), _tail(tail),
+      _phase(phase),
+      _required_safept(NULL),
+      _nest(0), _irreducible(0), _has_call(0), _has_sfpt(0), _rce_candidate(0)
+  { }
+
+  // Is 'l' a member of 'this'?
+  int is_member( const IdealLoopTree *l ) const; // Test for nested membership
+
+  // Set loop nesting depth.  Accumulate has_call bits.
+  int set_nest( uint depth );
+
+  // Split out multiple fall-in edges from the loop header.  Move them to a
+  // private RegionNode before the loop.  This becomes the loop landing pad.
+  void split_fall_in( PhaseIdealLoop *phase, int fall_in_cnt );
+
+  // Split out the outermost loop from this shared header.
+  void split_outer_loop( PhaseIdealLoop *phase );
+
+  // Merge all the backedges from the shared header into a private Region.
+  // Feed that region as the one backedge to this loop.
+  void merge_many_backedges( PhaseIdealLoop *phase );
+
+  // Split shared headers and insert loop landing pads.
+  // Insert a LoopNode to replace the RegionNode.
+  // Returns TRUE if loop tree is structurally changed.
+  bool beautify_loops( PhaseIdealLoop *phase );
+
+  // Perform iteration-splitting on inner loops.  Split iterations to avoid
+  // range checks or one-shot null checks.
+  void iteration_split( PhaseIdealLoop *phase, Node_List &old_new );
+
+  // Driver for various flavors of iteration splitting
+  void iteration_split_impl( PhaseIdealLoop *phase, Node_List &old_new );
+
+  // Given dominators, try to find loops with calls that must always be
+  // executed (call dominates loop tail).  These loops do not need non-call
+  // safepoints (ncsfpt).
+  void check_safepts(VectorSet &visited, Node_List &stack);
+
+  // Allpaths backwards scan from loop tail, terminating each path at first safepoint
+  // encountered.
+  void allpaths_check_safepts(VectorSet &visited, Node_List &stack);
+
+  // Convert to counted loops where possible
+  void counted_loop( PhaseIdealLoop *phase );
+
+  // Check for Node being a loop-breaking test
+  Node *is_loop_exit(Node *iff) const;
+
+  // Returns true if ctrl is executed on every complete iteration
+  bool dominates_backedge(Node* ctrl);
+
+  // Remove simplistic dead code from loop body
+  void DCE_loop_body();
+
+  // Look for loop-exit tests with my 50/50 guesses from the Parsing stage.
+  // Replace with a 1-in-10 exit guess.
+  void adjust_loop_exit_prob( PhaseIdealLoop *phase );
+
+  // Return TRUE or FALSE if the loop should never be RCE'd or aligned.
+  // Useful for unrolling loops with NO array accesses.
+  bool policy_peel_only( PhaseIdealLoop *phase ) const;
+
+  // Return TRUE or FALSE if the loop should be unswitched -- clone
+  // loop with an invariant test
+  bool policy_unswitching( PhaseIdealLoop *phase ) const;
+
+  // Micro-benchmark spamming.  Remove empty loops.
+  bool policy_do_remove_empty_loop( PhaseIdealLoop *phase );
+
+  // Return TRUE or FALSE if the loop should be peeled or not.  Peel if we can
+  // make some loop-invariant test (usually a null-check) happen before the
+  // loop.
+  bool policy_peeling( PhaseIdealLoop *phase ) const;
+
+  // Return TRUE or FALSE if the loop should be maximally unrolled. Stash any
+  // known trip count in the counted loop node.
+  bool policy_maximally_unroll( PhaseIdealLoop *phase ) const;
+
+  // Return TRUE or FALSE if the loop should be unrolled or not.  Unroll if
+  // the loop is a CountedLoop and the body is small enough.
+  bool policy_unroll( PhaseIdealLoop *phase ) const;
+
+  // Return TRUE or FALSE if the loop should be range-check-eliminated.
+  // Gather a list of IF tests that are dominated by iteration splitting;
+  // also gather the end of the first split and the start of the 2nd split.
+  bool policy_range_check( PhaseIdealLoop *phase ) const;
+
+  // Return TRUE or FALSE if the loop should be cache-line aligned.
+  // Gather the expression that does the alignment.  Note that only
+  // one array base can be aligned in a loop (unless the VM guarentees
+  // mutual alignment).  Note that if we vectorize short memory ops
+  // into longer memory ops, we may want to increase alignment.
+  bool policy_align( PhaseIdealLoop *phase ) const;
+
+  // Compute loop trip count from profile data
+  void compute_profile_trip_cnt( PhaseIdealLoop *phase );
+
+  // Reassociate invariant expressions.
+  void reassociate_invariants(PhaseIdealLoop *phase);
+  // Reassociate invariant add and subtract expressions.
+  Node* reassociate_add_sub(Node* n1, PhaseIdealLoop *phase);
+  // Return nonzero index of invariant operand if invariant and variant
+  // are combined with an Add or Sub. Helper for reassoicate_invariants.
+  int is_invariant_addition(Node* n, PhaseIdealLoop *phase);
+
+  // Return true if n is invariant
+  bool is_invariant(Node* n) const;
+
+  // Put loop body on igvn work list
+  void record_for_igvn();
+
+  bool is_loop()    { return !_irreducible && _tail && !_tail->is_top(); }
+  bool is_inner()   { return is_loop() && _child == NULL; }
+  bool is_counted() { return is_loop() && _head != NULL && _head->is_CountedLoop(); }
+
+#ifndef PRODUCT
+  void dump_head( ) const;      // Dump loop head only
+  void dump() const;            // Dump this loop recursively
+  void verify_tree(IdealLoopTree *loop, const IdealLoopTree *parent) const;
+#endif
+
+};
+
+// -----------------------------PhaseIdealLoop---------------------------------
+// Computes the mapping from Nodes to IdealLoopTrees.  Organizes IdealLoopTrees into a
+// loop tree.  Drives the loop-based transformations on the ideal graph.
+class PhaseIdealLoop : public PhaseTransform {
+  friend class IdealLoopTree;
+  friend class SuperWord;
+  // Pre-computed def-use info
+  PhaseIterGVN &_igvn;
+
+  // Head of loop tree
+  IdealLoopTree *_ltree_root;
+
+  // Array of pre-order numbers, plus post-visited bit.
+  // ZERO for not pre-visited.  EVEN for pre-visited but not post-visited.
+  // ODD for post-visited.  Other bits are the pre-order number.
+  uint *_preorders;
+  uint _max_preorder;
+
+  // Allocate _preorders[] array
+  void allocate_preorders() {
+    _max_preorder = C->unique()+8;
+    _preorders = NEW_RESOURCE_ARRAY(uint, _max_preorder);
+    memset(_preorders, 0, sizeof(uint) * _max_preorder);
+  }
+
+  // Allocate _preorders[] array
+  void reallocate_preorders() {
+    if ( _max_preorder < C->unique() ) {
+      _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, C->unique());
+      _max_preorder = C->unique();
+    }
+    memset(_preorders, 0, sizeof(uint) * _max_preorder);
+  }
+
+  // Check to grow _preorders[] array for the case when build_loop_tree_impl()
+  // adds new nodes.
+  void check_grow_preorders( ) {
+    if ( _max_preorder < C->unique() ) {
+      uint newsize = _max_preorder<<1;  // double size of array
+      _preorders = REALLOC_RESOURCE_ARRAY(uint, _preorders, _max_preorder, newsize);
+      memset(&_preorders[_max_preorder],0,sizeof(uint)*(newsize-_max_preorder));
+      _max_preorder = newsize;
+    }
+  }
+  // Check for pre-visited.  Zero for NOT visited; non-zero for visited.
+  int is_visited( Node *n ) const { return _preorders[n->_idx]; }
+  // Pre-order numbers are written to the Nodes array as low-bit-set values.
+  void set_preorder_visited( Node *n, int pre_order ) {
+    assert( !is_visited( n ), "already set" );
+    _preorders[n->_idx] = (pre_order<<1);
+  };
+  // Return pre-order number.
+  int get_preorder( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]>>1; }
+
+  // Check for being post-visited.
+  // Should be previsited already (checked with assert(is_visited(n))).
+  int is_postvisited( Node *n ) const { assert( is_visited(n), "" ); return _preorders[n->_idx]&1; }
+
+  // Mark as post visited
+  void set_postvisited( Node *n ) { assert( !is_postvisited( n ), "" ); _preorders[n->_idx] |= 1; }
+
+  // Set/get control node out.  Set lower bit to distinguish from IdealLoopTree
+  // Returns true if "n" is a data node, false if it's a control node.
+  bool has_ctrl( Node *n ) const { return ((intptr_t)_nodes[n->_idx]) & 1; }
+
+  // clear out dead code after build_loop_late
+  Node_List _deadlist;
+
+  // Support for faster execution of get_late_ctrl()/dom_lca()
+  // when a node has many uses and dominator depth is deep.
+  Node_Array _dom_lca_tags;
+  void   init_dom_lca_tags();
+  void   clear_dom_lca_tags();
+  // Inline wrapper for frequent cases:
+  // 1) only one use
+  // 2) a use is the same as the current LCA passed as 'n1'
+  Node *dom_lca_for_get_late_ctrl( Node *lca, Node *n, Node *tag ) {
+    assert( n->is_CFG(), "" );
+    // Fast-path NULL lca
+    if( lca != NULL && lca != n ) {
+      assert( lca->is_CFG(), "" );
+      // find LCA of all uses
+      n = dom_lca_for_get_late_ctrl_internal( lca, n, tag );
+    }
+    return find_non_split_ctrl(n);
+  }
+  Node *dom_lca_for_get_late_ctrl_internal( Node *lca, Node *n, Node *tag );
+  // true if CFG node d dominates CFG node n
+  bool is_dominator(Node *d, Node *n);
+
+  // Helper function for directing control inputs away from CFG split
+  // points.
+  Node *find_non_split_ctrl( Node *ctrl ) const {
+    if (ctrl != NULL) {
+      if (ctrl->is_MultiBranch()) {
+        ctrl = ctrl->in(0);
+      }
+      assert(ctrl->is_CFG(), "CFG");
+    }
+    return ctrl;
+  }
+
+public:
+  bool has_node( Node* n ) const { return _nodes[n->_idx] != NULL; }
+  // check if transform created new nodes that need _ctrl recorded
+  Node *get_late_ctrl( Node *n, Node *early );
+  Node *get_early_ctrl( Node *n );
+  void set_early_ctrl( Node *n );
+  void set_subtree_ctrl( Node *root );
+  void set_ctrl( Node *n, Node *ctrl ) {
+    assert( !has_node(n) || has_ctrl(n), "" );
+    assert( ctrl->in(0), "cannot set dead control node" );
+    assert( ctrl == find_non_split_ctrl(ctrl), "must set legal crtl" );
+    _nodes.map( n->_idx, (Node*)((intptr_t)ctrl + 1) );
+  }
+  // Set control and update loop membership
+  void set_ctrl_and_loop(Node* n, Node* ctrl) {
+    IdealLoopTree* old_loop = get_loop(get_ctrl(n));
+    IdealLoopTree* new_loop = get_loop(ctrl);
+    if (old_loop != new_loop) {
+      if (old_loop->_child == NULL) old_loop->_body.yank(n);
+      if (new_loop->_child == NULL) new_loop->_body.push(n);
+    }
+    set_ctrl(n, ctrl);
+  }
+  // Control nodes can be replaced or subsumed.  During this pass they
+  // get their replacement Node in slot 1.  Instead of updating the block
+  // location of all Nodes in the subsumed block, we lazily do it.  As we
+  // pull such a subsumed block out of the array, we write back the final
+  // correct block.
+  Node *get_ctrl( Node *i ) {
+    assert(has_node(i), "");
+    Node *n = get_ctrl_no_update(i);
+    _nodes.map( i->_idx, (Node*)((intptr_t)n + 1) );
+    assert(has_node(i) && has_ctrl(i), "");
+    assert(n == find_non_split_ctrl(n), "must return legal ctrl" );
+    return n;
+  }
+
+private:
+  Node *get_ctrl_no_update( Node *i ) const {
+    assert( has_ctrl(i), "" );
+    Node *n = (Node*)(((intptr_t)_nodes[i->_idx]) & ~1);
+    if (!n->in(0)) {
+      // Skip dead CFG nodes
+      do {
+        n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
+      } while (!n->in(0));
+      n = find_non_split_ctrl(n);
+    }
+    return n;
+  }
+
+  // Check for loop being set
+  // "n" must be a control node. Returns true if "n" is known to be in a loop.
+  bool has_loop( Node *n ) const {
+    assert(!has_node(n) || !has_ctrl(n), "");
+    return has_node(n);
+  }
+  // Set loop
+  void set_loop( Node *n, IdealLoopTree *loop ) {
+    _nodes.map(n->_idx, (Node*)loop);
+  }
+  // Lazy-dazy update of 'get_ctrl' and 'idom_at' mechanisms.  Replace
+  // the 'old_node' with 'new_node'.  Kill old-node.  Add a reference
+  // from old_node to new_node to support the lazy update.  Reference
+  // replaces loop reference, since that is not neede for dead node.
+public:
+  void lazy_update( Node *old_node, Node *new_node ) {
+    assert( old_node != new_node, "no cycles please" );
+    //old_node->set_req( 1, new_node /*NO DU INFO*/ );
+    // Nodes always have DU info now, so re-use the side array slot
+    // for this node to provide the forwarding pointer.
+    _nodes.map( old_node->_idx, (Node*)((intptr_t)new_node + 1) );
+  }
+  void lazy_replace( Node *old_node, Node *new_node ) {
+    _igvn.hash_delete(old_node);
+    _igvn.subsume_node( old_node, new_node );
+    lazy_update( old_node, new_node );
+  }
+  void lazy_replace_proj( Node *old_node, Node *new_node ) {
+    assert( old_node->req() == 1, "use this for Projs" );
+    _igvn.hash_delete(old_node); // Must hash-delete before hacking edges
+    old_node->add_req( NULL );
+    lazy_replace( old_node, new_node );
+  }
+
+private:
+
+  // Place 'n' in some loop nest, where 'n' is a CFG node
+  void build_loop_tree();
+  int build_loop_tree_impl( Node *n, int pre_order );
+  // Insert loop into the existing loop tree.  'innermost' is a leaf of the
+  // loop tree, not the root.
+  IdealLoopTree *sort( IdealLoopTree *loop, IdealLoopTree *innermost );
+
+  // Place Data nodes in some loop nest
+  void build_loop_early( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
+  void build_loop_late ( VectorSet &visited, Node_List &worklist, Node_Stack &nstack, const PhaseIdealLoop *verify_me );
+  void build_loop_late_post ( Node* n, const PhaseIdealLoop *verify_me );
+
+  // Array of immediate dominance info for each CFG node indexed by node idx
+private:
+  uint _idom_size;
+  Node **_idom;                 // Array of immediate dominators
+  uint *_dom_depth;           // Used for fast LCA test
+  GrowableArray<uint>* _dom_stk; // For recomputation of dom depth
+
+  Node* idom_no_update(Node* d) const {
+    assert(d->_idx < _idom_size, "oob");
+    Node* n = _idom[d->_idx];
+    assert(n != NULL,"Bad immediate dominator info.");
+    while (n->in(0) == NULL) {  // Skip dead CFG nodes
+      //n = n->in(1);
+      n = (Node*)(((intptr_t)_nodes[n->_idx]) & ~1);
+      assert(n != NULL,"Bad immediate dominator info.");
+    }
+    return n;
+  }
+  Node *idom(Node* d) const {
+    uint didx = d->_idx;
+    Node *n = idom_no_update(d);
+    _idom[didx] = n;            // Lazily remove dead CFG nodes from table.
+    return n;
+  }
+  uint dom_depth(Node* d) const {
+    assert(d->_idx < _idom_size, "");
+    return _dom_depth[d->_idx];
+  }
+  void set_idom(Node* d, Node* n, uint dom_depth);
+  // Locally compute IDOM using dom_lca call
+  Node *compute_idom( Node *region ) const;
+  // Recompute dom_depth
+  void recompute_dom_depth();
+
+  // Is safept not required by an outer loop?
+  bool is_deleteable_safept(Node* sfpt);
+
+public:
+  // Dominators for the sea of nodes
+  void Dominators();
+  Node *dom_lca( Node *n1, Node *n2 ) const {
+    return find_non_split_ctrl(dom_lca_internal(n1, n2));
+  }
+  Node *dom_lca_internal( Node *n1, Node *n2 ) const;
+
+  // Compute the Ideal Node to Loop mapping
+  PhaseIdealLoop( PhaseIterGVN &igvn, const PhaseIdealLoop *verify_me, bool do_split_ifs );
+
+  // True if the method has at least 1 irreducible loop
+  bool _has_irreducible_loops;
+
+  // Per-Node transform
+  virtual Node *transform( Node *a_node ) { return 0; }
+
+  Node *is_counted_loop( Node *x, IdealLoopTree *loop );
+
+  // Return a post-walked LoopNode
+  IdealLoopTree *get_loop( Node *n ) const {
+    // Dead nodes have no loop, so return the top level loop instead
+    if (!has_node(n))  return _ltree_root;
+    assert(!has_ctrl(n), "");
+    return (IdealLoopTree*)_nodes[n->_idx];
+  }
+
+  // Is 'n' a (nested) member of 'loop'?
+  int is_member( const IdealLoopTree *loop, Node *n ) const {
+    return loop->is_member(get_loop(n)); }
+
+  // This is the basic building block of the loop optimizations.  It clones an
+  // entire loop body.  It makes an old_new loop body mapping; with this
+  // mapping you can find the new-loop equivalent to an old-loop node.  All
+  // new-loop nodes are exactly equal to their old-loop counterparts, all
+  // edges are the same.  All exits from the old-loop now have a RegionNode
+  // that merges the equivalent new-loop path.  This is true even for the
+  // normal "loop-exit" condition.  All uses of loop-invariant old-loop values
+  // now come from (one or more) Phis that merge their new-loop equivalents.
+  // Parameter side_by_side_idom:
+  //   When side_by_size_idom is NULL, the dominator tree is constructed for
+  //      the clone loop to dominate the original.  Used in construction of
+  //      pre-main-post loop sequence.
+  //   When nonnull, the clone and original are side-by-side, both are
+  //      dominated by the passed in side_by_side_idom node.  Used in
+  //      construction of unswitched loops.
+  void clone_loop( IdealLoopTree *loop, Node_List &old_new, int dom_depth,
+                   Node* side_by_side_idom = NULL);
+
+  // If we got the effect of peeling, either by actually peeling or by
+  // making a pre-loop which must execute at least once, we can remove
+  // all loop-invariant dominated tests in the main body.
+  void peeled_dom_test_elim( IdealLoopTree *loop, Node_List &old_new );
+
+  // Generate code to do a loop peel for the given loop (and body).
+  // old_new is a temp array.
+  void do_peeling( IdealLoopTree *loop, Node_List &old_new );
+
+  // Add pre and post loops around the given loop.  These loops are used
+  // during RCE, unrolling and aligning loops.
+  void insert_pre_post_loops( IdealLoopTree *loop, Node_List &old_new, bool peel_only );
+  // If Node n lives in the back_ctrl block, we clone a private version of n
+  // in preheader_ctrl block and return that, otherwise return n.
+  Node *clone_up_backedge_goo( Node *back_ctrl, Node *preheader_ctrl, Node *n );
+
+  // Take steps to maximally unroll the loop.  Peel any odd iterations, then
+  // unroll to do double iterations.  The next round of major loop transforms
+  // will repeat till the doubled loop body does all remaining iterations in 1
+  // pass.
+  void do_maximally_unroll( IdealLoopTree *loop, Node_List &old_new );
+
+  // Unroll the loop body one step - make each trip do 2 iterations.
+  void do_unroll( IdealLoopTree *loop, Node_List &old_new, bool adjust_min_trip );
+
+  // Return true if exp is a constant times an induction var
+  bool is_scaled_iv(Node* exp, Node* iv, int* p_scale);
+
+  // Return true if exp is a scaled induction var plus (or minus) constant
+  bool is_scaled_iv_plus_offset(Node* exp, Node* iv, int* p_scale, Node** p_offset, int depth = 0);
+
+  // Eliminate range-checks and other trip-counter vs loop-invariant tests.
+  void do_range_check( IdealLoopTree *loop, Node_List &old_new );
+
+  // Create a slow version of the loop by cloning the loop
+  // and inserting an if to select fast-slow versions.
+  ProjNode* create_slow_version_of_loop(IdealLoopTree *loop,
+                                        Node_List &old_new);
+
+  // Clone loop with an invariant test (that does not exit) and
+  // insert a clone of the test that selects which version to
+  // execute.
+  void do_unswitching (IdealLoopTree *loop, Node_List &old_new);
+
+  // Find candidate "if" for unswitching
+  IfNode* find_unswitching_candidate(const IdealLoopTree *loop) const;
+
+  // Range Check Elimination uses this function!
+  // Constrain the main loop iterations so the affine function:
+  //    scale_con * I + offset  <  limit
+  // always holds true.  That is, either increase the number of iterations in
+  // the pre-loop or the post-loop until the condition holds true in the main
+  // loop.  Scale_con, offset and limit are all loop invariant.
+  void add_constraint( int stride_con, int scale_con, Node *offset, Node *limit, Node *pre_ctrl, Node **pre_limit, Node **main_limit );
+
+  // Partially peel loop up through last_peel node.
+  bool partial_peel( IdealLoopTree *loop, Node_List &old_new );
+
+  // Create a scheduled list of nodes control dependent on ctrl set.
+  void scheduled_nodelist( IdealLoopTree *loop, VectorSet& ctrl, Node_List &sched );
+  // Has a use in the vector set
+  bool has_use_in_set( Node* n, VectorSet& vset );
+  // Has use internal to the vector set (ie. not in a phi at the loop head)
+  bool has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop );
+  // clone "n" for uses that are outside of loop
+  void clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist );
+  // clone "n" for special uses that are in the not_peeled region
+  void clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
+                                          VectorSet& not_peel, Node_List& sink_list, Node_List& worklist );
+  // Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
+  void insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp );
+#ifdef ASSERT
+  // Validate the loop partition sets: peel and not_peel
+  bool is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list, VectorSet& not_peel );
+  // Ensure that uses outside of loop are of the right form
+  bool is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
+                                 uint orig_exit_idx, uint clone_exit_idx);
+  bool is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx);
+#endif
+
+  // Returns nonzero constant stride if-node is a possible iv test (otherwise returns zero.)
+  int stride_of_possible_iv( Node* iff );
+  bool is_possible_iv_test( Node* iff ) { return stride_of_possible_iv(iff) != 0; }
+  // Return the (unique) control output node that's in the loop (if it exists.)
+  Node* stay_in_loop( Node* n, IdealLoopTree *loop);
+  // Insert a signed compare loop exit cloned from an unsigned compare.
+  IfNode* insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop);
+  void remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop);
+  // Utility to register node "n" with PhaseIdealLoop
+  void register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth);
+  // Utility to create an if-projection
+  ProjNode* proj_clone(ProjNode* p, IfNode* iff);
+  // Force the iff control output to be the live_proj
+  Node* short_circuit_if(IfNode* iff, ProjNode* live_proj);
+  // Insert a region before an if projection
+  RegionNode* insert_region_before_proj(ProjNode* proj);
+  // Insert a new if before an if projection
+  ProjNode* insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj);
+
+  // Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+  // "Nearly" because all Nodes have been cloned from the original in the loop,
+  // but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
+  // through the Phi recursively, and return a Bool.
+  BoolNode *clone_iff( PhiNode *phi, IdealLoopTree *loop );
+  CmpNode *clone_bool( PhiNode *phi, IdealLoopTree *loop );
+
+
+  // Rework addressing expressions to get the most loop-invariant stuff
+  // moved out.  We'd like to do all associative operators, but it's especially
+  // important (common) to do address expressions.
+  Node *remix_address_expressions( Node *n );
+
+  // Attempt to use a conditional move instead of a phi/branch
+  Node *conditional_move( Node *n );
+
+  // Reorganize offset computations to lower register pressure.
+  // Mostly prevent loop-fallout uses of the pre-incremented trip counter
+  // (which are then alive with the post-incremented trip counter
+  // forcing an extra register move)
+  void reorg_offsets( IdealLoopTree *loop );
+
+  // Check for aggressive application of 'split-if' optimization,
+  // using basic block level info.
+  void  split_if_with_blocks     ( VectorSet &visited, Node_Stack &nstack );
+  Node *split_if_with_blocks_pre ( Node *n );
+  void  split_if_with_blocks_post( Node *n );
+  Node *has_local_phi_input( Node *n );
+  // Mark an IfNode as being dominated by a prior test,
+  // without actually altering the CFG (and hence IDOM info).
+  void dominated_by( Node *prevdom, Node *iff );
+
+  // Split Node 'n' through merge point
+  Node *split_thru_region( Node *n, Node *region );
+  // Split Node 'n' through merge point if there is enough win.
+  Node *split_thru_phi( Node *n, Node *region, int policy );
+  // Found an If getting its condition-code input from a Phi in the
+  // same block.  Split thru the Region.
+  void do_split_if( Node *iff );
+
+private:
+  // Return a type based on condition control flow
+  const TypeInt* filtered_type( Node *n, Node* n_ctrl);
+  const TypeInt* filtered_type( Node *n ) { return filtered_type(n, NULL); }
+ // Helpers for filtered type
+  const TypeInt* filtered_type_from_dominators( Node* val, Node *val_ctrl);
+  const TypeInt* filtered_type_at_if( Node* val, Node *if_proj);
+
+  // Helper functions
+  void register_new_node( Node *n, Node *blk );
+  Node *spinup( Node *iff, Node *new_false, Node *new_true, Node *region, Node *phi, small_cache *cache );
+  Node *find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true );
+  void handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true );
+  bool split_up( Node *n, Node *blk1, Node *blk2 );
+  void sink_use( Node *use, Node *post_loop );
+  Node *place_near_use( Node *useblock ) const;
+
+  bool _created_loop_node;
+public:
+  void set_created_loop_node() { _created_loop_node = true; }
+  bool created_loop_node()     { return _created_loop_node; }
+
+#ifndef PRODUCT
+  void dump( ) const;
+  void dump( IdealLoopTree *loop, uint rpo_idx, Node_List &rpo_list ) const;
+  void rpo( Node *start, Node_Stack &stk, VectorSet &visited, Node_List &rpo_list ) const;
+  void verify() const;          // Major slow  :-)
+  void verify_compare( Node *n, const PhaseIdealLoop *loop_verify, VectorSet &visited ) const;
+  IdealLoopTree *get_loop_idx(Node* n) const {
+    // Dead nodes have no loop, so return the top level loop instead
+    return _nodes[n->_idx] ? (IdealLoopTree*)_nodes[n->_idx] : _ltree_root;
+  }
+  // Print some stats
+  static void print_statistics();
+  static int _loop_invokes;     // Count of PhaseIdealLoop invokes
+  static int _loop_work;        // Sum of PhaseIdealLoop x _unique
+#endif
+};
+
+inline Node* IdealLoopTree::tail() {
+// Handle lazy update of _tail field
+  Node *n = _tail;
+  //while( !n->in(0) )  // Skip dead CFG nodes
+    //n = n->in(1);
+  if (n->in(0) == NULL)
+    n = _phase->get_ctrl(n);
+  _tail = n;
+  return n;
+}
+
+
+// Iterate over the loop tree using a preorder, left-to-right traversal.
+//
+// Example that visits all counted loops from within PhaseIdealLoop
+//
+//  for (LoopTreeIterator iter(_ltree_root); !iter.done(); iter.next()) {
+//   IdealLoopTree* lpt = iter.current();
+//   if (!lpt->is_counted()) continue;
+//   ...
+class LoopTreeIterator : public StackObj {
+private:
+  IdealLoopTree* _root;
+  IdealLoopTree* _curnt;
+
+public:
+  LoopTreeIterator(IdealLoopTree* root) : _root(root), _curnt(root) {}
+
+  bool done() { return _curnt == NULL; }       // Finished iterating?
+
+  void next();                                 // Advance to next loop tree
+
+  IdealLoopTree* current() { return _curnt; }  // Return current value of iterator.
+};
diff --git a/src/share/vm/opto/loopopts.cpp b/src/share/vm/opto/loopopts.cpp
new file mode 100644
index 000000000..0da6b1eee
--- /dev/null
+++ b/src/share/vm/opto/loopopts.cpp
@@ -0,0 +1,2677 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_loopopts.cpp.incl"
+
+//=============================================================================
+//------------------------------split_thru_phi---------------------------------
+// Split Node 'n' through merge point if there is enough win.
+Node *PhaseIdealLoop::split_thru_phi( Node *n, Node *region, int policy ) {
+  int wins = 0;
+  assert( !n->is_CFG(), "" );
+  assert( region->is_Region(), "" );
+  Node *phi = new (C, region->req()) PhiNode( region, n->bottom_type() );
+  uint old_unique = C->unique();
+  for( uint i = 1; i < region->req(); i++ ) {
+    Node *x;
+    Node* the_clone = NULL;
+    if( region->in(i) == C->top() ) {
+      x = C->top();             // Dead path?  Use a dead data op
+    } else {
+      x = n->clone();           // Else clone up the data op
+      the_clone = x;            // Remember for possible deletion.
+      // Alter data node to use pre-phi inputs
+      if( n->in(0) == region )
+        x->set_req( 0, region->in(i) );
+      for( uint j = 1; j < n->req(); j++ ) {
+        Node *in = n->in(j);
+        if( in->is_Phi() && in->in(0) == region )
+          x->set_req( j, in->in(i) ); // Use pre-Phi input for the clone
+      }
+    }
+    // Check for a 'win' on some paths
+    const Type *t = x->Value(&_igvn);
+
+    bool singleton = t->singleton();
+
+    // A TOP singleton indicates that there are no possible values incoming
+    // along a particular edge. In most cases, this is OK, and the Phi will
+    // be eliminated later in an Ideal call. However, we can't allow this to
+    // happen if the singleton occurs on loop entry, as the elimination of
+    // the PhiNode may cause the resulting node to migrate back to a previous
+    // loop iteration.
+    if( singleton && t == Type::TOP ) {
+      // Is_Loop() == false does not confirm the absence of a loop (e.g., an
+      // irreducible loop may not be indicated by an affirmative is_Loop());
+      // therefore, the only top we can split thru a phi is on a backedge of
+      // a loop.
+      singleton &= region->is_Loop() && (i != LoopNode::EntryControl);
+    }
+
+    if( singleton ) {
+      wins++;
+      x = ((PhaseGVN&)_igvn).makecon(t);
+    } else {
+      // We now call Identity to try to simplify the cloned node.
+      // Note that some Identity methods call phase->type(this).
+      // Make sure that the type array is big enough for
+      // our new node, even though we may throw the node away.
+      // (Note: This tweaking with igvn only works because x is a new node.)
+      _igvn.set_type(x, t);
+      Node *y = x->Identity(&_igvn);
+      if( y != x ) {
+        wins++;
+        x = y;
+      } else {
+        y = _igvn.hash_find(x);
+        if( y ) {
+          wins++;
+          x = y;
+        } else {
+          // Else x is a new node we are keeping
+          // We do not need register_new_node_with_optimizer
+          // because set_type has already been called.
+          _igvn._worklist.push(x);
+        }
+      }
+    }
+    if (x != the_clone && the_clone != NULL)
+      _igvn.remove_dead_node(the_clone);
+    phi->set_req( i, x );
+  }
+  // Too few wins?
+  if( wins <= policy ) {
+    _igvn.remove_dead_node(phi);
+    return NULL;
+  }
+
+  // Record Phi
+  register_new_node( phi, region );
+
+  for( uint i2 = 1; i2 < phi->req(); i2++ ) {
+    Node *x = phi->in(i2);
+    // If we commoned up the cloned 'x' with another existing Node,
+    // the existing Node picks up a new use.  We need to make the
+    // existing Node occur higher up so it dominates its uses.
+    Node *old_ctrl;
+    IdealLoopTree *old_loop;
+
+    // The occasional new node
+    if( x->_idx >= old_unique ) {   // Found a new, unplaced node?
+      old_ctrl = x->is_Con() ? C->root() : NULL;
+      old_loop = NULL;              // Not in any prior loop
+    } else {
+      old_ctrl = x->is_Con() ? C->root() : get_ctrl(x);
+      old_loop = get_loop(old_ctrl); // Get prior loop
+    }
+    // New late point must dominate new use
+    Node *new_ctrl = dom_lca( old_ctrl, region->in(i2) );
+    // Set new location
+    set_ctrl(x, new_ctrl);
+    IdealLoopTree *new_loop = get_loop( new_ctrl );
+    // If changing loop bodies, see if we need to collect into new body
+    if( old_loop != new_loop ) {
+      if( old_loop && !old_loop->_child )
+        old_loop->_body.yank(x);
+      if( !new_loop->_child )
+        new_loop->_body.push(x);  // Collect body info
+    }
+  }
+
+  return phi;
+}
+
+//------------------------------dominated_by------------------------------------
+// Replace the dominated test with an obvious true or false.  Place it on the
+// IGVN worklist for later cleanup.  Move control-dependent data Nodes on the
+// live path up to the dominating control.
+void PhaseIdealLoop::dominated_by( Node *prevdom, Node *iff ) {
+#ifndef PRODUCT
+  if( VerifyLoopOptimizations && PrintOpto ) tty->print_cr("dominating test");
+#endif
+
+
+  // prevdom is the dominating projection of the dominating test.
+  assert( iff->is_If(), "" );
+  assert( iff->Opcode() == Op_If || iff->Opcode() == Op_CountedLoopEnd, "Check this code when new subtype is added");
+  int pop = prevdom->Opcode();
+  assert( pop == Op_IfFalse || pop == Op_IfTrue, "" );
+  // 'con' is set to true or false to kill the dominated test.
+  Node *con = _igvn.makecon(pop == Op_IfTrue ? TypeInt::ONE : TypeInt::ZERO);
+  set_ctrl(con, C->root()); // Constant gets a new use
+  // Hack the dominated test
+  _igvn.hash_delete(iff);
+  iff->set_req(1, con);
+  _igvn._worklist.push(iff);
+
+  // If I dont have a reachable TRUE and FALSE path following the IfNode then
+  // I can assume this path reaches an infinite loop.  In this case it's not
+  // important to optimize the data Nodes - either the whole compilation will
+  // be tossed or this path (and all data Nodes) will go dead.
+  if( iff->outcnt() != 2 ) return;
+
+  // Make control-dependent data Nodes on the live path (path that will remain
+  // once the dominated IF is removed) become control-dependent on the
+  // dominating projection.
+  Node* dp = ((IfNode*)iff)->proj_out(pop == Op_IfTrue);
+  IdealLoopTree *old_loop = get_loop(dp);
+
+  for (DUIterator_Fast imax, i = dp->fast_outs(imax); i < imax; i++) {
+    Node* cd = dp->fast_out(i); // Control-dependent node
+    if( cd->depends_only_on_test() ) {
+      assert( cd->in(0) == dp, "" );
+      _igvn.hash_delete( cd );
+      cd->set_req(0, prevdom);
+      set_early_ctrl( cd );
+      _igvn._worklist.push(cd);
+      IdealLoopTree *new_loop = get_loop(get_ctrl(cd));
+      if( old_loop != new_loop ) {
+        if( !old_loop->_child ) old_loop->_body.yank(cd);
+        if( !new_loop->_child ) new_loop->_body.push(cd);
+      }
+      --i;
+      --imax;
+    }
+  }
+}
+
+//------------------------------has_local_phi_input----------------------------
+// Return TRUE if 'n' has Phi inputs from its local block and no other
+// block-local inputs (all non-local-phi inputs come from earlier blocks)
+Node *PhaseIdealLoop::has_local_phi_input( Node *n ) {
+  Node *n_ctrl = get_ctrl(n);
+  // See if some inputs come from a Phi in this block, or from before
+  // this block.
+  uint i;
+  for( i = 1; i < n->req(); i++ ) {
+    Node *phi = n->in(i);
+    if( phi->is_Phi() && phi->in(0) == n_ctrl )
+      break;
+  }
+  if( i >= n->req() )
+    return NULL;                // No Phi inputs; nowhere to clone thru
+
+  // Check for inputs created between 'n' and the Phi input.  These
+  // must split as well; they have already been given the chance
+  // (courtesy of a post-order visit) and since they did not we must
+  // recover the 'cost' of splitting them by being very profitable
+  // when splitting 'n'.  Since this is unlikely we simply give up.
+  for( i = 1; i < n->req(); i++ ) {
+    Node *m = n->in(i);
+    if( get_ctrl(m) == n_ctrl && !m->is_Phi() ) {
+      // We allow the special case of AddP's with no local inputs.
+      // This allows us to split-up address expressions.
+      if (m->is_AddP() &&
+          get_ctrl(m->in(2)) != n_ctrl &&
+          get_ctrl(m->in(3)) != n_ctrl) {
+        // Move the AddP up to dominating point
+        set_ctrl_and_loop(m, find_non_split_ctrl(idom(n_ctrl)));
+        continue;
+      }
+      return NULL;
+    }
+  }
+
+  return n_ctrl;
+}
+
+//------------------------------remix_address_expressions----------------------
+// Rework addressing expressions to get the most loop-invariant stuff
+// moved out.  We'd like to do all associative operators, but it's especially
+// important (common) to do address expressions.
+Node *PhaseIdealLoop::remix_address_expressions( Node *n ) {
+  if (!has_ctrl(n))  return NULL;
+  Node *n_ctrl = get_ctrl(n);
+  IdealLoopTree *n_loop = get_loop(n_ctrl);
+
+  // See if 'n' mixes loop-varying and loop-invariant inputs and
+  // itself is loop-varying.
+
+  // Only interested in binary ops (and AddP)
+  if( n->req() < 3 || n->req() > 4 ) return NULL;
+
+  Node *n1_ctrl = get_ctrl(n->in(                    1));
+  Node *n2_ctrl = get_ctrl(n->in(                    2));
+  Node *n3_ctrl = get_ctrl(n->in(n->req() == 3 ? 2 : 3));
+  IdealLoopTree *n1_loop = get_loop( n1_ctrl );
+  IdealLoopTree *n2_loop = get_loop( n2_ctrl );
+  IdealLoopTree *n3_loop = get_loop( n3_ctrl );
+
+  // Does one of my inputs spin in a tighter loop than self?
+  if( (n_loop->is_member( n1_loop ) && n_loop != n1_loop) ||
+      (n_loop->is_member( n2_loop ) && n_loop != n2_loop) ||
+      (n_loop->is_member( n3_loop ) && n_loop != n3_loop) )
+    return NULL;                // Leave well enough alone
+
+  // Is at least one of my inputs loop-invariant?
+  if( n1_loop == n_loop &&
+      n2_loop == n_loop &&
+      n3_loop == n_loop )
+    return NULL;                // No loop-invariant inputs
+
+
+  int n_op = n->Opcode();
+
+  // Replace expressions like ((V+I) << 2) with (V<<2 + I<<2).
+  if( n_op == Op_LShiftI ) {
+    // Scale is loop invariant
+    Node *scale = n->in(2);
+    Node *scale_ctrl = get_ctrl(scale);
+    IdealLoopTree *scale_loop = get_loop(scale_ctrl );
+    if( n_loop == scale_loop || !scale_loop->is_member( n_loop ) )
+      return NULL;
+    const TypeInt *scale_t = scale->bottom_type()->isa_int();
+    if( scale_t && scale_t->is_con() && scale_t->get_con() >= 16 )
+      return NULL;              // Dont bother with byte/short masking
+    // Add must vary with loop (else shift would be loop-invariant)
+    Node *add = n->in(1);
+    Node *add_ctrl = get_ctrl(add);
+    IdealLoopTree *add_loop = get_loop(add_ctrl);
+    //assert( n_loop == add_loop, "" );
+    if( n_loop != add_loop ) return NULL;  // happens w/ evil ZKM loops
+
+    // Convert I-V into I+ (0-V); same for V-I
+    if( add->Opcode() == Op_SubI &&
+        _igvn.type( add->in(1) ) != TypeInt::ZERO ) {
+      Node *zero = _igvn.intcon(0);
+      set_ctrl(zero, C->root());
+      Node *neg = new (C, 3) SubINode( _igvn.intcon(0), add->in(2) );
+      register_new_node( neg, get_ctrl(add->in(2) ) );
+      add = new (C, 3) AddINode( add->in(1), neg );
+      register_new_node( add, add_ctrl );
+    }
+    if( add->Opcode() != Op_AddI ) return NULL;
+    // See if one add input is loop invariant
+    Node *add_var = add->in(1);
+    Node *add_var_ctrl = get_ctrl(add_var);
+    IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
+    Node *add_invar = add->in(2);
+    Node *add_invar_ctrl = get_ctrl(add_invar);
+    IdealLoopTree *add_invar_loop = get_loop(add_invar_ctrl );
+    if( add_var_loop == n_loop ) {
+    } else if( add_invar_loop == n_loop ) {
+      // Swap to find the invariant part
+      add_invar = add_var;
+      add_invar_ctrl = add_var_ctrl;
+      add_invar_loop = add_var_loop;
+      add_var = add->in(2);
+      Node *add_var_ctrl = get_ctrl(add_var);
+      IdealLoopTree *add_var_loop = get_loop(add_var_ctrl );
+    } else                      // Else neither input is loop invariant
+      return NULL;
+    if( n_loop == add_invar_loop || !add_invar_loop->is_member( n_loop ) )
+      return NULL;              // No invariant part of the add?
+
+    // Yes!  Reshape address expression!
+    Node *inv_scale = new (C, 3) LShiftINode( add_invar, scale );
+    register_new_node( inv_scale, add_invar_ctrl );
+    Node *var_scale = new (C, 3) LShiftINode( add_var, scale );
+    register_new_node( var_scale, n_ctrl );
+    Node *var_add = new (C, 3) AddINode( var_scale, inv_scale );
+    register_new_node( var_add, n_ctrl );
+    _igvn.hash_delete( n );
+    _igvn.subsume_node( n, var_add );
+    return var_add;
+  }
+
+  // Replace (I+V) with (V+I)
+  if( n_op == Op_AddI ||
+      n_op == Op_AddL ||
+      n_op == Op_AddF ||
+      n_op == Op_AddD ||
+      n_op == Op_MulI ||
+      n_op == Op_MulL ||
+      n_op == Op_MulF ||
+      n_op == Op_MulD ) {
+    if( n2_loop == n_loop ) {
+      assert( n1_loop != n_loop, "" );
+      n->swap_edges(1, 2);
+    }
+  }
+
+  // Replace ((I1 +p V) +p I2) with ((I1 +p I2) +p V),
+  // but not if I2 is a constant.
+  if( n_op == Op_AddP ) {
+    if( n2_loop == n_loop && n3_loop != n_loop ) {
+      if( n->in(2)->Opcode() == Op_AddP && !n->in(3)->is_Con() ) {
+        Node *n22_ctrl = get_ctrl(n->in(2)->in(2));
+        Node *n23_ctrl = get_ctrl(n->in(2)->in(3));
+        IdealLoopTree *n22loop = get_loop( n22_ctrl );
+        IdealLoopTree *n23_loop = get_loop( n23_ctrl );
+        if( n22loop != n_loop && n22loop->is_member(n_loop) &&
+            n23_loop == n_loop ) {
+          Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2)->in(2), n->in(3) );
+          // Stuff new AddP in the loop preheader
+          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
+          Node *add2 = new (C, 4) AddPNode( n->in(1), add1, n->in(2)->in(3) );
+          register_new_node( add2, n_ctrl );
+          _igvn.hash_delete( n );
+          _igvn.subsume_node( n, add2 );
+          return add2;
+        }
+      }
+    }
+
+    // Replace (I1 +p (I2 + V)) with ((I1 +p I2) +p V)
+    if( n2_loop != n_loop && n3_loop == n_loop ) {
+      if( n->in(3)->Opcode() == Op_AddI ) {
+        Node *V = n->in(3)->in(1);
+        Node *I = n->in(3)->in(2);
+        if( is_member(n_loop,get_ctrl(V)) ) {
+        } else {
+          Node *tmp = V; V = I; I = tmp;
+        }
+        if( !is_member(n_loop,get_ctrl(I)) ) {
+          Node *add1 = new (C, 4) AddPNode( n->in(1), n->in(2), I );
+          // Stuff new AddP in the loop preheader
+          register_new_node( add1, n_loop->_head->in(LoopNode::EntryControl) );
+          Node *add2 = new (C, 4) AddPNode( n->in(1), add1, V );
+          register_new_node( add2, n_ctrl );
+          _igvn.hash_delete( n );
+          _igvn.subsume_node( n, add2 );
+          return add2;
+        }
+      }
+    }
+  }
+
+  return NULL;
+}
+
+//------------------------------conditional_move-------------------------------
+// Attempt to replace a Phi with a conditional move.  We have some pretty
+// strict profitability requirements.  All Phis at the merge point must
+// be converted, so we can remove the control flow.  We need to limit the
+// number of c-moves to a small handful.  All code that was in the side-arms
+// of the CFG diamond is now speculatively executed.  This code has to be
+// "cheap enough".  We are pretty much limited to CFG diamonds that merge
+// 1 or 2 items with a total of 1 or 2 ops executed speculatively.
+Node *PhaseIdealLoop::conditional_move( Node *region ) {
+
+  assert( region->is_Region(), "sanity check" );
+  if( region->req() != 3 ) return NULL;
+
+  // Check for CFG diamond
+  Node *lp = region->in(1);
+  Node *rp = region->in(2);
+  if( !lp || !rp ) return NULL;
+  Node *lp_c = lp->in(0);
+  if( lp_c == NULL || lp_c != rp->in(0) || !lp_c->is_If() ) return NULL;
+  IfNode *iff = lp_c->as_If();
+
+  // Check for highly predictable branch.  No point in CMOV'ing if
+  // we are going to predict accurately all the time.
+  // %%% This hides patterns produced by utility methods like Math.min.
+  if( iff->_prob < PROB_UNLIKELY_MAG(3) ||
+      iff->_prob > PROB_LIKELY_MAG(3) )
+    return NULL;
+
+  // Check for ops pinned in an arm of the diamond.
+  // Can't remove the control flow in this case
+  if( lp->outcnt() > 1 ) return NULL;
+  if( rp->outcnt() > 1 ) return NULL;
+
+  // Check profitability
+  int cost = 0;
+  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+    Node *out = region->fast_out(i);
+    if( !out->is_Phi() ) continue; // Ignore other control edges, etc
+    PhiNode* phi = out->as_Phi();
+    switch (phi->type()->basic_type()) {
+    case T_LONG:
+      cost++;                   // Probably encodes as 2 CMOV's
+    case T_INT:                 // These all CMOV fine
+    case T_FLOAT:
+    case T_DOUBLE:
+    case T_ADDRESS:             // (RawPtr)
+      cost++;
+      break;
+    case T_OBJECT: {            // Base oops are OK, but not derived oops
+      const TypeOopPtr *tp = phi->type()->isa_oopptr();
+      // Derived pointers are Bad (tm): what's the Base (for GC purposes) of a
+      // CMOVE'd derived pointer?  It's a CMOVE'd derived base.  Thus
+      // CMOVE'ing a derived pointer requires we also CMOVE the base.  If we
+      // have a Phi for the base here that we convert to a CMOVE all is well
+      // and good.  But if the base is dead, we'll not make a CMOVE.  Later
+      // the allocator will have to produce a base by creating a CMOVE of the
+      // relevant bases.  This puts the allocator in the business of
+      // manufacturing expensive instructions, generally a bad plan.
+      // Just Say No to Conditionally-Moved Derived Pointers.
+      if( tp && tp->offset() != 0 )
+        return NULL;
+      cost++;
+      break;
+    }
+    default:
+      return NULL;              // In particular, can't do memory or I/O
+    }
+    // Add in cost any speculative ops
+    for( uint j = 1; j < region->req(); j++ ) {
+      Node *proj = region->in(j);
+      Node *inp = phi->in(j);
+      if (get_ctrl(inp) == proj) { // Found local op
+        cost++;
+        // Check for a chain of dependent ops; these will all become
+        // speculative in a CMOV.
+        for( uint k = 1; k < inp->req(); k++ )
+          if (get_ctrl(inp->in(k)) == proj)
+            return NULL;        // Too much speculative goo
+      }
+    }
+    // See if the Phi is used by a Cmp.  This will likely Split-If, a
+    // higher-payoff operation.
+    for (DUIterator_Fast kmax, k = phi->fast_outs(kmax); k < kmax; k++) {
+      Node* use = phi->fast_out(k);
+      if( use->is_Cmp() )
+        return NULL;
+    }
+  }
+  if( cost >= ConditionalMoveLimit ) return NULL; // Too much goo
+
+  // --------------
+  // Now replace all Phis with CMOV's
+  Node *cmov_ctrl = iff->in(0);
+  uint flip = (lp->Opcode() == Op_IfTrue);
+  while( 1 ) {
+    PhiNode* phi = NULL;
+    for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+      Node *out = region->fast_out(i);
+      if (out->is_Phi()) {
+        phi = out->as_Phi();
+        break;
+      }
+    }
+    if (phi == NULL)  break;
+#ifndef PRODUCT
+    if( PrintOpto && VerifyLoopOptimizations ) tty->print_cr("CMOV");
+#endif
+    // Move speculative ops
+    for( uint j = 1; j < region->req(); j++ ) {
+      Node *proj = region->in(j);
+      Node *inp = phi->in(j);
+      if (get_ctrl(inp) == proj) { // Found local op
+#ifndef PRODUCT
+        if( PrintOpto && VerifyLoopOptimizations ) {
+          tty->print("  speculate: ");
+          inp->dump();
+        }
+#endif
+        set_ctrl(inp, cmov_ctrl);
+      }
+    }
+    Node *cmov = CMoveNode::make( C, cmov_ctrl, iff->in(1), phi->in(1+flip), phi->in(2-flip), _igvn.type(phi) );
+    register_new_node( cmov, cmov_ctrl );
+    _igvn.hash_delete(phi);
+    _igvn.subsume_node( phi, cmov );
+#ifndef PRODUCT
+    if( VerifyLoopOptimizations ) verify();
+#endif
+  }
+
+  // The useless CFG diamond will fold up later; see the optimization in
+  // RegionNode::Ideal.
+  _igvn._worklist.push(region);
+
+  return iff->in(1);
+}
+
+//------------------------------split_if_with_blocks_pre-----------------------
+// Do the real work in a non-recursive function.  Data nodes want to be
+// cloned in the pre-order so they can feed each other nicely.
+Node *PhaseIdealLoop::split_if_with_blocks_pre( Node *n ) {
+  // Cloning these guys is unlikely to win
+  int n_op = n->Opcode();
+  if( n_op == Op_MergeMem ) return n;
+  if( n->is_Proj() ) return n;
+  // Do not clone-up CmpFXXX variations, as these are always
+  // followed by a CmpI
+  if( n->is_Cmp() ) return n;
+  // Attempt to use a conditional move instead of a phi/branch
+  if( ConditionalMoveLimit > 0 && n_op == Op_Region ) {
+    Node *cmov = conditional_move( n );
+    if( cmov ) return cmov;
+  }
+  if( n->is_CFG() || n_op == Op_StorePConditional || n_op == Op_StoreLConditional || n_op == Op_CompareAndSwapI || n_op == Op_CompareAndSwapL ||n_op == Op_CompareAndSwapP)  return n;
+  if( n_op == Op_Opaque1 ||     // Opaque nodes cannot be mod'd
+      n_op == Op_Opaque2 ) {
+    if( !C->major_progress() )   // If chance of no more loop opts...
+      _igvn._worklist.push(n);  // maybe we'll remove them
+    return n;
+  }
+
+  if( n->is_Con() ) return n;   // No cloning for Con nodes
+
+  Node *n_ctrl = get_ctrl(n);
+  if( !n_ctrl ) return n;       // Dead node
+
+  // Attempt to remix address expressions for loop invariants
+  Node *m = remix_address_expressions( n );
+  if( m ) return m;
+
+  // Determine if the Node has inputs from some local Phi.
+  // Returns the block to clone thru.
+  Node *n_blk = has_local_phi_input( n );
+  if( !n_blk ) return n;
+  // Do not clone the trip counter through on a CountedLoop
+  // (messes up the canonical shape).
+  if( n_blk->is_CountedLoop() && n->Opcode() == Op_AddI ) return n;
+
+  // Check for having no control input; not pinned.  Allow
+  // dominating control.
+  if( n->in(0) ) {
+    Node *dom = idom(n_blk);
+    if( dom_lca( n->in(0), dom ) != n->in(0) )
+      return n;
+  }
+  // Policy: when is it profitable.  You must get more wins than
+  // policy before it is considered profitable.  Policy is usually 0,
+  // so 1 win is considered profitable.  Big merges will require big
+  // cloning, so get a larger policy.
+  int policy = n_blk->req() >> 2;
+
+  // If the loop is a candidate for range check elimination,
+  // delay splitting through it's phi until a later loop optimization
+  if (n_blk->is_CountedLoop()) {
+    IdealLoopTree *lp = get_loop(n_blk);
+    if (lp && lp->_rce_candidate) {
+      return n;
+    }
+  }
+
+  // Use same limit as split_if_with_blocks_post
+  if( C->unique() > 35000 ) return n; // Method too big
+
+  // Split 'n' through the merge point if it is profitable
+  Node *phi = split_thru_phi( n, n_blk, policy );
+  if( !phi ) return n;
+
+  // Found a Phi to split thru!
+  // Replace 'n' with the new phi
+  _igvn.hash_delete(n);
+  _igvn.subsume_node( n, phi );
+  // Moved a load around the loop, 'en-registering' something.
+  if( n_blk->Opcode() == Op_Loop && n->is_Load() &&
+      !phi->in(LoopNode::LoopBackControl)->is_Load() )
+    C->set_major_progress();
+
+  return phi;
+}
+
+static bool merge_point_too_heavy(Compile* C, Node* region) {
+  // Bail out if the region and its phis have too many users.
+  int weight = 0;
+  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+    weight += region->fast_out(i)->outcnt();
+  }
+  int nodes_left = MaxNodeLimit - C->unique();
+  if (weight * 8 > nodes_left) {
+#ifndef PRODUCT
+    if (PrintOpto)
+      tty->print_cr("*** Split-if bails out:  %d nodes, region weight %d", C->unique(), weight);
+#endif
+    return true;
+  } else {
+    return false;
+  }
+}
+
+#ifdef _LP64
+static bool merge_point_safe(Node* region) {
+  // 4799512: Stop split_if_with_blocks from splitting a block with a ConvI2LNode
+  // having a PhiNode input. This sidesteps the dangerous case where the split
+  // ConvI2LNode may become TOP if the input Value() does not
+  // overlap the ConvI2L range, leaving a node which may not dominate its
+  // uses.
+  // A better fix for this problem can be found in the BugTraq entry, but
+  // expediency for Mantis demands this hack.
+  for (DUIterator_Fast imax, i = region->fast_outs(imax); i < imax; i++) {
+    Node* n = region->fast_out(i);
+    if (n->is_Phi()) {
+      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+        Node* m = n->fast_out(j);
+        if (m->Opcode() == Op_ConvI2L) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+#endif
+
+
+//------------------------------place_near_use---------------------------------
+// Place some computation next to use but not inside inner loops.
+// For inner loop uses move it to the preheader area.
+Node *PhaseIdealLoop::place_near_use( Node *useblock ) const {
+  IdealLoopTree *u_loop = get_loop( useblock );
+  return (u_loop->_irreducible || u_loop->_child)
+    ? useblock
+    : u_loop->_head->in(LoopNode::EntryControl);
+}
+
+
+//------------------------------split_if_with_blocks_post----------------------
+// Do the real work in a non-recursive function.  CFG hackery wants to be
+// in the post-order, so it can dirty the I-DOM info and not use the dirtied
+// info.
+void PhaseIdealLoop::split_if_with_blocks_post( Node *n ) {
+
+  // Cloning Cmp through Phi's involves the split-if transform.
+  // FastLock is not used by an If
+  if( n->is_Cmp() && !n->is_FastLock() ) {
+    if( C->unique() > 35000 ) return; // Method too big
+
+    // Do not do 'split-if' if irreducible loops are present.
+    if( _has_irreducible_loops )
+      return;
+
+    Node *n_ctrl = get_ctrl(n);
+    // Determine if the Node has inputs from some local Phi.
+    // Returns the block to clone thru.
+    Node *n_blk = has_local_phi_input( n );
+    if( n_blk != n_ctrl ) return;
+
+    if( merge_point_too_heavy(C, n_ctrl) )
+      return;
+
+    if( n->outcnt() != 1 ) return; // Multiple bool's from 1 compare?
+    Node *bol = n->unique_out();
+    assert( bol->is_Bool(), "expect a bool here" );
+    if( bol->outcnt() != 1 ) return;// Multiple branches from 1 compare?
+    Node *iff = bol->unique_out();
+
+    // Check some safety conditions
+    if( iff->is_If() ) {        // Classic split-if?
+      if( iff->in(0) != n_ctrl ) return; // Compare must be in same blk as if
+    } else if (iff->is_CMove()) { // Trying to split-up a CMOVE
+      if( get_ctrl(iff->in(2)) == n_ctrl ||
+          get_ctrl(iff->in(3)) == n_ctrl )
+        return;                 // Inputs not yet split-up
+      if ( get_loop(n_ctrl) != get_loop(get_ctrl(iff)) ) {
+        return;                 // Loop-invar test gates loop-varying CMOVE
+      }
+    } else {
+      return;  // some other kind of node, such as an Allocate
+    }
+
+    // Do not do 'split-if' if some paths are dead.  First do dead code
+    // elimination and then see if its still profitable.
+    for( uint i = 1; i < n_ctrl->req(); i++ )
+      if( n_ctrl->in(i) == C->top() )
+        return;
+
+    // When is split-if profitable?  Every 'win' on means some control flow
+    // goes dead, so it's almost always a win.
+    int policy = 0;
+    // If trying to do a 'Split-If' at the loop head, it is only
+    // profitable if the cmp folds up on BOTH paths.  Otherwise we
+    // risk peeling a loop forever.
+
+    // CNC - Disabled for now.  Requires careful handling of loop
+    // body selection for the cloned code.  Also, make sure we check
+    // for any input path not being in the same loop as n_ctrl.  For
+    // irreducible loops we cannot check for 'n_ctrl->is_Loop()'
+    // because the alternative loop entry points won't be converted
+    // into LoopNodes.
+    IdealLoopTree *n_loop = get_loop(n_ctrl);
+    for( uint j = 1; j < n_ctrl->req(); j++ )
+      if( get_loop(n_ctrl->in(j)) != n_loop )
+        return;
+
+#ifdef _LP64
+    // Check for safety of the merge point.
+    if( !merge_point_safe(n_ctrl) ) {
+      return;
+    }
+#endif
+
+    // Split compare 'n' through the merge point if it is profitable
+    Node *phi = split_thru_phi( n, n_ctrl, policy );
+    if( !phi ) return;
+
+    // Found a Phi to split thru!
+    // Replace 'n' with the new phi
+    _igvn.hash_delete(n);
+    _igvn.subsume_node( n, phi );
+
+    // Now split the bool up thru the phi
+    Node *bolphi = split_thru_phi( bol, n_ctrl, -1 );
+    _igvn.hash_delete(bol);
+    _igvn.subsume_node( bol, bolphi );
+    assert( iff->in(1) == bolphi, "" );
+    if( bolphi->Value(&_igvn)->singleton() )
+      return;
+
+    // Conditional-move?  Must split up now
+    if( !iff->is_If() ) {
+      Node *cmovphi = split_thru_phi( iff, n_ctrl, -1 );
+      _igvn.hash_delete(iff);
+      _igvn.subsume_node( iff, cmovphi );
+      return;
+    }
+
+    // Now split the IF
+    do_split_if( iff );
+    return;
+  }
+
+  // Check for an IF ready to split; one that has its
+  // condition codes input coming from a Phi at the block start.
+  int n_op = n->Opcode();
+
+  // Check for an IF being dominated by another IF same test
+  if( n_op == Op_If ) {
+    Node *bol = n->in(1);
+    uint max = bol->outcnt();
+    // Check for same test used more than once?
+    if( n_op == Op_If && max > 1 && bol->is_Bool() ) {
+      // Search up IDOMs to see if this IF is dominated.
+      Node *cutoff = get_ctrl(bol);
+
+      // Now search up IDOMs till cutoff, looking for a dominating test
+      Node *prevdom = n;
+      Node *dom = idom(prevdom);
+      while( dom != cutoff ) {
+        if( dom->req() > 1 && dom->in(1) == bol && prevdom->in(0) == dom ) {
+          // Replace the dominated test with an obvious true or false.
+          // Place it on the IGVN worklist for later cleanup.
+          C->set_major_progress();
+          dominated_by( prevdom, n );
+#ifndef PRODUCT
+          if( VerifyLoopOptimizations ) verify();
+#endif
+          return;
+        }
+        prevdom = dom;
+        dom = idom(prevdom);
+      }
+    }
+  }
+
+  // See if a shared loop-varying computation has no loop-varying uses.
+  // Happens if something is only used for JVM state in uncommon trap exits,
+  // like various versions of induction variable+offset.  Clone the
+  // computation per usage to allow it to sink out of the loop.
+  if (has_ctrl(n) && !n->in(0)) {// n not dead and has no control edge (can float about)
+    Node *n_ctrl = get_ctrl(n);
+    IdealLoopTree *n_loop = get_loop(n_ctrl);
+    if( n_loop != _ltree_root ) {
+      DUIterator_Fast imax, i = n->fast_outs(imax);
+      for (; i < imax; i++) {
+        Node* u = n->fast_out(i);
+        if( !has_ctrl(u) )     break; // Found control user
+        IdealLoopTree *u_loop = get_loop(get_ctrl(u));
+        if( u_loop == n_loop ) break; // Found loop-varying use
+        if( n_loop->is_member( u_loop ) ) break; // Found use in inner loop
+        if( u->Opcode() == Op_Opaque1 ) break; // Found loop limit, bugfix for 4677003
+      }
+      bool did_break = (i < imax);  // Did we break out of the previous loop?
+      if (!did_break && n->outcnt() > 1) { // All uses in outer loops!
+        Node *late_load_ctrl;
+        if (n->is_Load()) {
+          // If n is a load, get and save the result from get_late_ctrl(),
+          // to be later used in calculating the control for n's clones.
+          clear_dom_lca_tags();
+          late_load_ctrl = get_late_ctrl(n, n_ctrl);
+        }
+        // If n is a load, and the late control is the same as the current
+        // control, then the cloning of n is a pointless exercise, because
+        // GVN will ensure that we end up where we started.
+        if (!n->is_Load() || late_load_ctrl != n_ctrl) {
+          for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; ) {
+            Node *u = n->last_out(j); // Clone private computation per use
+            _igvn.hash_delete(u);
+            _igvn._worklist.push(u);
+            Node *x = n->clone(); // Clone computation
+            Node *x_ctrl = NULL;
+            if( u->is_Phi() ) {
+              // Replace all uses of normal nodes.  Replace Phi uses
+              // individually, so the seperate Nodes can sink down
+              // different paths.
+              uint k = 1;
+              while( u->in(k) != n ) k++;
+              u->set_req( k, x );
+              // x goes next to Phi input path
+              x_ctrl = u->in(0)->in(k);
+              --j;
+            } else {              // Normal use
+              // Replace all uses
+              for( uint k = 0; k < u->req(); k++ ) {
+                if( u->in(k) == n ) {
+                  u->set_req( k, x );
+                  --j;
+                }
+              }
+              x_ctrl = get_ctrl(u);
+            }
+
+            // Find control for 'x' next to use but not inside inner loops.
+            // For inner loop uses get the preheader area.
+            x_ctrl = place_near_use(x_ctrl);
+
+            if (n->is_Load()) {
+              // For loads, add a control edge to a CFG node outside of the loop
+              // to force them to not combine and return back inside the loop
+              // during GVN optimization (4641526).
+              //
+              // Because we are setting the actual control input, factor in
+              // the result from get_late_ctrl() so we respect any
+              // anti-dependences. (6233005).
+              x_ctrl = dom_lca(late_load_ctrl, x_ctrl);
+
+              // Don't allow the control input to be a CFG splitting node.
+              // Such nodes should only have ProjNodes as outs, e.g. IfNode
+              // should only have IfTrueNode and IfFalseNode (4985384).
+              x_ctrl = find_non_split_ctrl(x_ctrl);
+              assert(dom_depth(n_ctrl) <= dom_depth(x_ctrl), "n is later than its clone");
+
+              x->set_req(0, x_ctrl);
+            }
+            register_new_node(x, x_ctrl);
+
+            // Some institutional knowledge is needed here: 'x' is
+            // yanked because if the optimizer runs GVN on it all the
+            // cloned x's will common up and undo this optimization and
+            // be forced back in the loop.  This is annoying because it
+            // makes +VerifyOpto report false-positives on progress.  I
+            // tried setting control edges on the x's to force them to
+            // not combine, but the matching gets worried when it tries
+            // to fold a StoreP and an AddP together (as part of an
+            // address expression) and the AddP and StoreP have
+            // different controls.
+            if( !x->is_Load() ) _igvn._worklist.yank(x);
+          }
+          _igvn.remove_dead_node(n);
+        }
+      }
+    }
+  }
+
+  // Check for Opaque2's who's loop has disappeared - who's input is in the
+  // same loop nest as their output.  Remove 'em, they are no longer useful.
+  if( n_op == Op_Opaque2 &&
+      n->in(1) != NULL &&
+      get_loop(get_ctrl(n)) == get_loop(get_ctrl(n->in(1))) ) {
+    _igvn.add_users_to_worklist(n);
+    _igvn.hash_delete(n);
+    _igvn.subsume_node( n, n->in(1) );
+  }
+}
+
+//------------------------------split_if_with_blocks---------------------------
+// Check for aggressive application of 'split-if' optimization,
+// using basic block level info.
+void PhaseIdealLoop::split_if_with_blocks( VectorSet &visited, Node_Stack &nstack ) {
+  Node *n = C->root();
+  visited.set(n->_idx); // first, mark node as visited
+  // Do pre-visit work for root
+  n = split_if_with_blocks_pre( n );
+  uint cnt = n->outcnt();
+  uint i   = 0;
+  while (true) {
+    // Visit all children
+    if (i < cnt) {
+      Node* use = n->raw_out(i);
+      ++i;
+      if (use->outcnt() != 0 && !visited.test_set(use->_idx)) {
+        // Now do pre-visit work for this use
+        use = split_if_with_blocks_pre( use );
+        nstack.push(n, i); // Save parent and next use's index.
+        n   = use;         // Process all children of current use.
+        cnt = use->outcnt();
+        i   = 0;
+      }
+    }
+    else {
+      // All of n's children have been processed, complete post-processing.
+      if (cnt != 0 && !n->is_Con()) {
+        assert(has_node(n), "no dead nodes");
+        split_if_with_blocks_post( n );
+      }
+      if (nstack.is_empty()) {
+        // Finished all nodes on stack.
+        break;
+      }
+      // Get saved parent node and next use's index. Visit the rest of uses.
+      n   = nstack.node();
+      cnt = n->outcnt();
+      i   = nstack.index();
+      nstack.pop();
+    }
+  }
+}
+
+
+//=============================================================================
+//
+//                   C L O N E   A   L O O P   B O D Y
+//
+
+//------------------------------clone_iff--------------------------------------
+// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+// "Nearly" because all Nodes have been cloned from the original in the loop,
+// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
+// through the Phi recursively, and return a Bool.
+BoolNode *PhaseIdealLoop::clone_iff( PhiNode *phi, IdealLoopTree *loop ) {
+
+  // Convert this Phi into a Phi merging Bools
+  uint i;
+  for( i = 1; i < phi->req(); i++ ) {
+    Node *b = phi->in(i);
+    if( b->is_Phi() ) {
+      _igvn.hash_delete(phi);
+      _igvn._worklist.push(phi);
+      phi->set_req(i, clone_iff( b->as_Phi(), loop ));
+    } else {
+      assert( b->is_Bool(), "" );
+    }
+  }
+
+  Node *sample_bool = phi->in(1);
+  Node *sample_cmp  = sample_bool->in(1);
+
+  // Make Phis to merge the Cmp's inputs.
+  int size = phi->in(0)->req();
+  PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+  PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+  for( i = 1; i < phi->req(); i++ ) {
+    Node *n1 = phi->in(i)->in(1)->in(1);
+    Node *n2 = phi->in(i)->in(1)->in(2);
+    phi1->set_req( i, n1 );
+    phi2->set_req( i, n2 );
+    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
+    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
+  }
+  // See if these Phis have been made before.
+  // Register with optimizer
+  Node *hit1 = _igvn.hash_find_insert(phi1);
+  if( hit1 ) {                  // Hit, toss just made Phi
+    _igvn.remove_dead_node(phi1); // Remove new phi
+    assert( hit1->is_Phi(), "" );
+    phi1 = (PhiNode*)hit1;      // Use existing phi
+  } else {                      // Miss
+    _igvn.register_new_node_with_optimizer(phi1);
+  }
+  Node *hit2 = _igvn.hash_find_insert(phi2);
+  if( hit2 ) {                  // Hit, toss just made Phi
+    _igvn.remove_dead_node(phi2); // Remove new phi
+    assert( hit2->is_Phi(), "" );
+    phi2 = (PhiNode*)hit2;      // Use existing phi
+  } else {                      // Miss
+    _igvn.register_new_node_with_optimizer(phi2);
+  }
+  // Register Phis with loop/block info
+  set_ctrl(phi1, phi->in(0));
+  set_ctrl(phi2, phi->in(0));
+  // Make a new Cmp
+  Node *cmp = sample_cmp->clone();
+  cmp->set_req( 1, phi1 );
+  cmp->set_req( 2, phi2 );
+  _igvn.register_new_node_with_optimizer(cmp);
+  set_ctrl(cmp, phi->in(0));
+
+  // Make a new Bool
+  Node *b = sample_bool->clone();
+  b->set_req(1,cmp);
+  _igvn.register_new_node_with_optimizer(b);
+  set_ctrl(b, phi->in(0));
+
+  assert( b->is_Bool(), "" );
+  return (BoolNode*)b;
+}
+
+//------------------------------clone_bool-------------------------------------
+// Passed in a Phi merging (recursively) some nearly equivalent Bool/Cmps.
+// "Nearly" because all Nodes have been cloned from the original in the loop,
+// but the fall-in edges to the Cmp are different.  Clone bool/Cmp pairs
+// through the Phi recursively, and return a Bool.
+CmpNode *PhaseIdealLoop::clone_bool( PhiNode *phi, IdealLoopTree *loop ) {
+  uint i;
+  // Convert this Phi into a Phi merging Bools
+  for( i = 1; i < phi->req(); i++ ) {
+    Node *b = phi->in(i);
+    if( b->is_Phi() ) {
+      _igvn.hash_delete(phi);
+      _igvn._worklist.push(phi);
+      phi->set_req(i, clone_bool( b->as_Phi(), loop ));
+    } else {
+      assert( b->is_Cmp() || b->is_top(), "inputs are all Cmp or TOP" );
+    }
+  }
+
+  Node *sample_cmp = phi->in(1);
+
+  // Make Phis to merge the Cmp's inputs.
+  int size = phi->in(0)->req();
+  PhiNode *phi1 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+  PhiNode *phi2 = new (C, size) PhiNode( phi->in(0), Type::TOP );
+  for( uint j = 1; j < phi->req(); j++ ) {
+    Node *cmp_top = phi->in(j); // Inputs are all Cmp or TOP
+    Node *n1, *n2;
+    if( cmp_top->is_Cmp() ) {
+      n1 = cmp_top->in(1);
+      n2 = cmp_top->in(2);
+    } else {
+      n1 = n2 = cmp_top;
+    }
+    phi1->set_req( j, n1 );
+    phi2->set_req( j, n2 );
+    phi1->set_type( phi1->type()->meet(n1->bottom_type()) );
+    phi2->set_type( phi2->type()->meet(n2->bottom_type()) );
+  }
+
+  // See if these Phis have been made before.
+  // Register with optimizer
+  Node *hit1 = _igvn.hash_find_insert(phi1);
+  if( hit1 ) {                  // Hit, toss just made Phi
+    _igvn.remove_dead_node(phi1); // Remove new phi
+    assert( hit1->is_Phi(), "" );
+    phi1 = (PhiNode*)hit1;      // Use existing phi
+  } else {                      // Miss
+    _igvn.register_new_node_with_optimizer(phi1);
+  }
+  Node *hit2 = _igvn.hash_find_insert(phi2);
+  if( hit2 ) {                  // Hit, toss just made Phi
+    _igvn.remove_dead_node(phi2); // Remove new phi
+    assert( hit2->is_Phi(), "" );
+    phi2 = (PhiNode*)hit2;      // Use existing phi
+  } else {                      // Miss
+    _igvn.register_new_node_with_optimizer(phi2);
+  }
+  // Register Phis with loop/block info
+  set_ctrl(phi1, phi->in(0));
+  set_ctrl(phi2, phi->in(0));
+  // Make a new Cmp
+  Node *cmp = sample_cmp->clone();
+  cmp->set_req( 1, phi1 );
+  cmp->set_req( 2, phi2 );
+  _igvn.register_new_node_with_optimizer(cmp);
+  set_ctrl(cmp, phi->in(0));
+
+  assert( cmp->is_Cmp(), "" );
+  return (CmpNode*)cmp;
+}
+
+//------------------------------sink_use---------------------------------------
+// If 'use' was in the loop-exit block, it now needs to be sunk
+// below the post-loop merge point.
+void PhaseIdealLoop::sink_use( Node *use, Node *post_loop ) {
+  if (!use->is_CFG() && get_ctrl(use) == post_loop->in(2)) {
+    set_ctrl(use, post_loop);
+    for (DUIterator j = use->outs(); use->has_out(j); j++)
+      sink_use(use->out(j), post_loop);
+  }
+}
+
+//------------------------------clone_loop-------------------------------------
+//
+//                   C L O N E   A   L O O P   B O D Y
+//
+// This is the basic building block of the loop optimizations.  It clones an
+// entire loop body.  It makes an old_new loop body mapping; with this mapping
+// you can find the new-loop equivalent to an old-loop node.  All new-loop
+// nodes are exactly equal to their old-loop counterparts, all edges are the
+// same.  All exits from the old-loop now have a RegionNode that merges the
+// equivalent new-loop path.  This is true even for the normal "loop-exit"
+// condition.  All uses of loop-invariant old-loop values now come from (one
+// or more) Phis that merge their new-loop equivalents.
+//
+// This operation leaves the graph in an illegal state: there are two valid
+// control edges coming from the loop pre-header to both loop bodies.  I'll
+// definitely have to hack the graph after running this transform.
+//
+// From this building block I will further edit edges to perform loop peeling
+// or loop unrolling or iteration splitting (Range-Check-Elimination), etc.
+//
+// Parameter side_by_size_idom:
+//   When side_by_size_idom is NULL, the dominator tree is constructed for
+//      the clone loop to dominate the original.  Used in construction of
+//      pre-main-post loop sequence.
+//   When nonnull, the clone and original are side-by-side, both are
+//      dominated by the side_by_side_idom node.  Used in construction of
+//      unswitched loops.
+void PhaseIdealLoop::clone_loop( IdealLoopTree *loop, Node_List &old_new, int dd,
+                                 Node* side_by_side_idom) {
+
+  // Step 1: Clone the loop body.  Make the old->new mapping.
+  uint i;
+  for( i = 0; i < loop->_body.size(); i++ ) {
+    Node *old = loop->_body.at(i);
+    Node *nnn = old->clone();
+    old_new.map( old->_idx, nnn );
+    _igvn.register_new_node_with_optimizer(nnn);
+  }
+
+
+  // Step 2: Fix the edges in the new body.  If the old input is outside the
+  // loop use it.  If the old input is INside the loop, use the corresponding
+  // new node instead.
+  for( i = 0; i < loop->_body.size(); i++ ) {
+    Node *old = loop->_body.at(i);
+    Node *nnn = old_new[old->_idx];
+    // Fix CFG/Loop controlling the new node
+    if (has_ctrl(old)) {
+      set_ctrl(nnn, old_new[get_ctrl(old)->_idx]);
+    } else {
+      set_loop(nnn, loop->_parent);
+      if (old->outcnt() > 0) {
+        set_idom( nnn, old_new[idom(old)->_idx], dd );
+      }
+    }
+    // Correct edges to the new node
+    for( uint j = 0; j < nnn->req(); j++ ) {
+        Node *n = nnn->in(j);
+        if( n ) {
+          IdealLoopTree *old_in_loop = get_loop( has_ctrl(n) ? get_ctrl(n) : n );
+          if( loop->is_member( old_in_loop ) )
+            nnn->set_req(j, old_new[n->_idx]);
+        }
+    }
+    _igvn.hash_find_insert(nnn);
+  }
+  Node *newhead = old_new[loop->_head->_idx];
+  set_idom(newhead, newhead->in(LoopNode::EntryControl), dd);
+
+
+  // Step 3: Now fix control uses.  Loop varying control uses have already
+  // been fixed up (as part of all input edges in Step 2).  Loop invariant
+  // control uses must be either an IfFalse or an IfTrue.  Make a merge
+  // point to merge the old and new IfFalse/IfTrue nodes; make the use
+  // refer to this.
+  ResourceArea *area = Thread::current()->resource_area();
+  Node_List worklist(area);
+  uint new_counter = C->unique();
+  for( i = 0; i < loop->_body.size(); i++ ) {
+    Node* old = loop->_body.at(i);
+    if( !old->is_CFG() ) continue;
+    Node* nnn = old_new[old->_idx];
+
+    // Copy uses to a worklist, so I can munge the def-use info
+    // with impunity.
+    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
+      worklist.push(old->fast_out(j));
+
+    while( worklist.size() ) {  // Visit all uses
+      Node *use = worklist.pop();
+      if (!has_node(use))  continue; // Ignore dead nodes
+      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
+      if( !loop->is_member( use_loop ) && use->is_CFG() ) {
+        // Both OLD and USE are CFG nodes here.
+        assert( use->is_Proj(), "" );
+
+        // Clone the loop exit control projection
+        Node *newuse = use->clone();
+        newuse->set_req(0,nnn);
+        _igvn.register_new_node_with_optimizer(newuse);
+        set_loop(newuse, use_loop);
+        set_idom(newuse, nnn, dom_depth(nnn) + 1 );
+
+        // We need a Region to merge the exit from the peeled body and the
+        // exit from the old loop body.
+        RegionNode *r = new (C, 3) RegionNode(3);
+        // Map the old use to the new merge point
+        old_new.map( use->_idx, r );
+        uint dd_r = MIN2(dom_depth(newuse),dom_depth(use));
+        assert( dd_r >= dom_depth(dom_lca(newuse,use)), "" );
+
+        // The original user of 'use' uses 'r' instead.
+        for (DUIterator_Last lmin, l = use->last_outs(lmin); l >= lmin;) {
+          Node* useuse = use->last_out(l);
+          _igvn.hash_delete(useuse);
+          _igvn._worklist.push(useuse);
+          uint uses_found = 0;
+          if( useuse->in(0) == use ) {
+            useuse->set_req(0, r);
+            uses_found++;
+            if( useuse->is_CFG() ) {
+              assert( dom_depth(useuse) > dd_r, "" );
+              set_idom(useuse, r, dom_depth(useuse));
+            }
+          }
+          for( uint k = 1; k < useuse->req(); k++ ) {
+            if( useuse->in(k) == use ) {
+              useuse->set_req(k, r);
+              uses_found++;
+            }
+          }
+          l -= uses_found;    // we deleted 1 or more copies of this edge
+        }
+
+        // Now finish up 'r'
+        r->set_req( 1, newuse );
+        r->set_req( 2,    use );
+        _igvn.register_new_node_with_optimizer(r);
+        set_loop(r, use_loop);
+        set_idom(r, !side_by_side_idom ? newuse->in(0) : side_by_side_idom, dd_r);
+      } // End of if a loop-exit test
+    }
+  }
+
+  // Step 4: If loop-invariant use is not control, it must be dominated by a
+  // loop exit IfFalse/IfTrue.  Find "proper" loop exit.  Make a Region
+  // there if needed.  Make a Phi there merging old and new used values.
+  Node_List *split_if_set = NULL;
+  Node_List *split_bool_set = NULL;
+  Node_List *split_cex_set = NULL;
+  for( i = 0; i < loop->_body.size(); i++ ) {
+    Node* old = loop->_body.at(i);
+    Node* nnn = old_new[old->_idx];
+    // Copy uses to a worklist, so I can munge the def-use info
+    // with impunity.
+    for (DUIterator_Fast jmax, j = old->fast_outs(jmax); j < jmax; j++)
+      worklist.push(old->fast_out(j));
+
+    while( worklist.size() ) {
+      Node *use = worklist.pop();
+      if (!has_node(use))  continue; // Ignore dead nodes
+      if (use->in(0) == C->top())  continue;
+      IdealLoopTree *use_loop = get_loop( has_ctrl(use) ? get_ctrl(use) : use );
+      // Check for data-use outside of loop - at least one of OLD or USE
+      // must not be a CFG node.
+      if( !loop->is_member( use_loop ) && (!old->is_CFG() || !use->is_CFG())) {
+
+        // If the Data use is an IF, that means we have an IF outside of the
+        // loop that is switching on a condition that is set inside of the
+        // loop.  Happens if people set a loop-exit flag; then test the flag
+        // in the loop to break the loop, then test is again outside of the
+        // loop to determine which way the loop exited.
+        if( use->is_If() || use->is_CMove() ) {
+          // Since this code is highly unlikely, we lazily build the worklist
+          // of such Nodes to go split.
+          if( !split_if_set )
+            split_if_set = new Node_List(area);
+          split_if_set->push(use);
+        }
+        if( use->is_Bool() ) {
+          if( !split_bool_set )
+            split_bool_set = new Node_List(area);
+          split_bool_set->push(use);
+        }
+        if( use->Opcode() == Op_CreateEx ) {
+          if( !split_cex_set )
+            split_cex_set = new Node_List(area);
+          split_cex_set->push(use);
+        }
+
+
+        // Get "block" use is in
+        uint idx = 0;
+        while( use->in(idx) != old ) idx++;
+        Node *prev = use->is_CFG() ? use : get_ctrl(use);
+        assert( !loop->is_member( get_loop( prev ) ), "" );
+        Node *cfg = prev->_idx >= new_counter
+          ? prev->in(2)
+          : idom(prev);
+        if( use->is_Phi() )     // Phi use is in prior block
+          cfg = prev->in(idx);  // NOT in block of Phi itself
+        if (cfg->is_top()) {    // Use is dead?
+          _igvn.hash_delete(use);
+          _igvn._worklist.push(use);
+          use->set_req(idx, C->top());
+          continue;
+        }
+
+        while( !loop->is_member( get_loop( cfg ) ) ) {
+          prev = cfg;
+          cfg = cfg->_idx >= new_counter ? cfg->in(2) : idom(cfg);
+        }
+        // If the use occurs after merging several exits from the loop, then
+        // old value must have dominated all those exits.  Since the same old
+        // value was used on all those exits we did not need a Phi at this
+        // merge point.  NOW we do need a Phi here.  Each loop exit value
+        // is now merged with the peeled body exit; each exit gets its own
+        // private Phi and those Phis need to be merged here.
+        Node *phi;
+        if( prev->is_Region() ) {
+          if( idx == 0 ) {      // Updating control edge?
+            phi = prev;         // Just use existing control
+          } else {              // Else need a new Phi
+            phi = PhiNode::make( prev, old );
+            // Now recursively fix up the new uses of old!
+            for( uint i = 1; i < prev->req(); i++ ) {
+              worklist.push(phi); // Onto worklist once for each 'old' input
+            }
+          }
+        } else {
+          // Get new RegionNode merging old and new loop exits
+          prev = old_new[prev->_idx];
+          assert( prev, "just made this in step 7" );
+          if( idx == 0 ) {      // Updating control edge?
+            phi = prev;         // Just use existing control
+          } else {              // Else need a new Phi
+            // Make a new Phi merging data values properly
+            phi = PhiNode::make( prev, old );
+            phi->set_req( 1, nnn );
+          }
+        }
+        // If inserting a new Phi, check for prior hits
+        if( idx != 0 ) {
+          Node *hit = _igvn.hash_find_insert(phi);
+          if( hit == NULL ) {
+           _igvn.register_new_node_with_optimizer(phi); // Register new phi
+          } else {                                      // or
+            // Remove the new phi from the graph and use the hit
+            _igvn.remove_dead_node(phi);
+            phi = hit;                                  // Use existing phi
+          }
+          set_ctrl(phi, prev);
+        }
+        // Make 'use' use the Phi instead of the old loop body exit value
+        _igvn.hash_delete(use);
+        _igvn._worklist.push(use);
+        use->set_req(idx, phi);
+        if( use->_idx >= new_counter ) { // If updating new phis
+          // Not needed for correctness, but prevents a weak assert
+          // in AddPNode from tripping (when we end up with different
+          // base & derived Phis that will become the same after
+          // IGVN does CSE).
+          Node *hit = _igvn.hash_find_insert(use);
+          if( hit )             // Go ahead and re-hash for hits.
+            _igvn.subsume_node( use, hit );
+        }
+
+        // If 'use' was in the loop-exit block, it now needs to be sunk
+        // below the post-loop merge point.
+        sink_use( use, prev );
+      }
+    }
+  }
+
+  // Check for IFs that need splitting/cloning.  Happens if an IF outside of
+  // the loop uses a condition set in the loop.  The original IF probably
+  // takes control from one or more OLD Regions (which in turn get from NEW
+  // Regions).  In any case, there will be a set of Phis for each merge point
+  // from the IF up to where the original BOOL def exists the loop.
+  if( split_if_set ) {
+    while( split_if_set->size() ) {
+      Node *iff = split_if_set->pop();
+      if( iff->in(1)->is_Phi() ) {
+        BoolNode *b = clone_iff( iff->in(1)->as_Phi(), loop );
+        _igvn.hash_delete(iff);
+        _igvn._worklist.push(iff);
+        iff->set_req(1, b);
+      }
+    }
+  }
+  if( split_bool_set ) {
+    while( split_bool_set->size() ) {
+      Node *b = split_bool_set->pop();
+      Node *phi = b->in(1);
+      assert( phi->is_Phi(), "" );
+      CmpNode *cmp = clone_bool( (PhiNode*)phi, loop );
+      _igvn.hash_delete(b);
+      _igvn._worklist.push(b);
+      b->set_req(1, cmp);
+    }
+  }
+  if( split_cex_set ) {
+    while( split_cex_set->size() ) {
+      Node *b = split_cex_set->pop();
+      assert( b->in(0)->is_Region(), "" );
+      assert( b->in(1)->is_Phi(), "" );
+      assert( b->in(0)->in(0) == b->in(1)->in(0), "" );
+      split_up( b, b->in(0), NULL );
+    }
+  }
+
+}
+
+
+//---------------------- stride_of_possible_iv -------------------------------------
+// Looks for an iff/bool/comp with one operand of the compare
+// being a cycle involving an add and a phi,
+// with an optional truncation (left-shift followed by a right-shift)
+// of the add. Returns zero if not an iv.
+int PhaseIdealLoop::stride_of_possible_iv(Node* iff) {
+  Node* trunc1 = NULL;
+  Node* trunc2 = NULL;
+  const TypeInt* ttype = NULL;
+  if (!iff->is_If() || iff->in(1) == NULL || !iff->in(1)->is_Bool()) {
+    return 0;
+  }
+  BoolNode* bl = iff->in(1)->as_Bool();
+  Node* cmp = bl->in(1);
+  if (!cmp || cmp->Opcode() != Op_CmpI && cmp->Opcode() != Op_CmpU) {
+    return 0;
+  }
+  // Must have an invariant operand
+  if (is_member(get_loop(iff), get_ctrl(cmp->in(2)))) {
+    return 0;
+  }
+  Node* add2 = NULL;
+  Node* cmp1 = cmp->in(1);
+  if (cmp1->is_Phi()) {
+    // (If (Bool (CmpX phi:(Phi ...(Optional-trunc(AddI phi add2))) )))
+    Node* phi = cmp1;
+    for (uint i = 1; i < phi->req(); i++) {
+      Node* in = phi->in(i);
+      Node* add = CountedLoopNode::match_incr_with_optional_truncation(in,
+                                &trunc1, &trunc2, &ttype);
+      if (add && add->in(1) == phi) {
+        add2 = add->in(2);
+        break;
+      }
+    }
+  } else {
+    // (If (Bool (CmpX addtrunc:(Optional-trunc((AddI (Phi ...addtrunc...) add2)) )))
+    Node* addtrunc = cmp1;
+    Node* add = CountedLoopNode::match_incr_with_optional_truncation(addtrunc,
+                                &trunc1, &trunc2, &ttype);
+    if (add && add->in(1)->is_Phi()) {
+      Node* phi = add->in(1);
+      for (uint i = 1; i < phi->req(); i++) {
+        if (phi->in(i) == addtrunc) {
+          add2 = add->in(2);
+          break;
+        }
+      }
+    }
+  }
+  if (add2 != NULL) {
+    const TypeInt* add2t = _igvn.type(add2)->is_int();
+    if (add2t->is_con()) {
+      return add2t->get_con();
+    }
+  }
+  return 0;
+}
+
+
+//---------------------- stay_in_loop -------------------------------------
+// Return the (unique) control output node that's in the loop (if it exists.)
+Node* PhaseIdealLoop::stay_in_loop( Node* n, IdealLoopTree *loop) {
+  Node* unique = NULL;
+  if (!n) return NULL;
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node* use = n->fast_out(i);
+    if (!has_ctrl(use) && loop->is_member(get_loop(use))) {
+      if (unique != NULL) {
+        return NULL;
+      }
+      unique = use;
+    }
+  }
+  return unique;
+}
+
+//------------------------------ register_node -------------------------------------
+// Utility to register node "n" with PhaseIdealLoop
+void PhaseIdealLoop::register_node(Node* n, IdealLoopTree *loop, Node* pred, int ddepth) {
+  _igvn.register_new_node_with_optimizer(n);
+  loop->_body.push(n);
+  if (n->is_CFG()) {
+    set_loop(n, loop);
+    set_idom(n, pred, ddepth);
+  } else {
+    set_ctrl(n, pred);
+  }
+}
+
+//------------------------------ proj_clone -------------------------------------
+// Utility to create an if-projection
+ProjNode* PhaseIdealLoop::proj_clone(ProjNode* p, IfNode* iff) {
+  ProjNode* c = p->clone()->as_Proj();
+  c->set_req(0, iff);
+  return c;
+}
+
+//------------------------------ short_circuit_if -------------------------------------
+// Force the iff control output to be the live_proj
+Node* PhaseIdealLoop::short_circuit_if(IfNode* iff, ProjNode* live_proj) {
+  int proj_con = live_proj->_con;
+  assert(proj_con == 0 || proj_con == 1, "false or true projection");
+  Node *con = _igvn.intcon(proj_con);
+  set_ctrl(con, C->root());
+  if (iff) {
+    iff->set_req(1, con);
+  }
+  return con;
+}
+
+//------------------------------ insert_if_before_proj -------------------------------------
+// Insert a new if before an if projection (* - new node)
+//
+// before
+//           if(test)
+//           /     \
+//          v       v
+//    other-proj   proj (arg)
+//
+// after
+//           if(test)
+//           /     \
+//          /       v
+//         |      * proj-clone
+//         v          |
+//    other-proj      v
+//                * new_if(relop(cmp[IU](left,right)))
+//                  /  \
+//                 v    v
+//         * new-proj  proj
+//         (returned)
+//
+ProjNode* PhaseIdealLoop::insert_if_before_proj(Node* left, bool Signed, BoolTest::mask relop, Node* right, ProjNode* proj) {
+  IfNode* iff = proj->in(0)->as_If();
+  IdealLoopTree *loop = get_loop(proj);
+  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
+  int ddepth = dom_depth(proj);
+
+  _igvn.hash_delete(iff);
+  _igvn._worklist.push(iff);
+  _igvn.hash_delete(proj);
+  _igvn._worklist.push(proj);
+
+  proj->set_req(0, NULL);  // temporary disconnect
+  ProjNode* proj2 = proj_clone(proj, iff);
+  register_node(proj2, loop, iff, ddepth);
+
+  Node* cmp = Signed ? (Node*) new (C,3)CmpINode(left, right) : (Node*) new (C,3)CmpUNode(left, right);
+  register_node(cmp, loop, proj2, ddepth);
+
+  BoolNode* bol = new (C,2)BoolNode(cmp, relop);
+  register_node(bol, loop, proj2, ddepth);
+
+  IfNode* new_if = new (C,2)IfNode(proj2, bol, iff->_prob, iff->_fcnt);
+  register_node(new_if, loop, proj2, ddepth);
+
+  proj->set_req(0, new_if); // reattach
+  set_idom(proj, new_if, ddepth);
+
+  ProjNode* new_exit = proj_clone(other_proj, new_if)->as_Proj();
+  register_node(new_exit, get_loop(other_proj), new_if, ddepth);
+
+  return new_exit;
+}
+
+//------------------------------ insert_region_before_proj -------------------------------------
+// Insert a region before an if projection (* - new node)
+//
+// before
+//           if(test)
+//          /      |
+//         v       |
+//       proj      v
+//               other-proj
+//
+// after
+//           if(test)
+//          /      |
+//         v       |
+// * proj-clone    v
+//         |     other-proj
+//         v
+// * new-region
+//         |
+//         v
+// *      dum_if
+//       /     \
+//      v       \
+// * dum-proj    v
+//              proj
+//
+RegionNode* PhaseIdealLoop::insert_region_before_proj(ProjNode* proj) {
+  IfNode* iff = proj->in(0)->as_If();
+  IdealLoopTree *loop = get_loop(proj);
+  ProjNode *other_proj = iff->proj_out(!proj->is_IfTrue())->as_Proj();
+  int ddepth = dom_depth(proj);
+
+  _igvn.hash_delete(iff);
+  _igvn._worklist.push(iff);
+  _igvn.hash_delete(proj);
+  _igvn._worklist.push(proj);
+
+  proj->set_req(0, NULL);  // temporary disconnect
+  ProjNode* proj2 = proj_clone(proj, iff);
+  register_node(proj2, loop, iff, ddepth);
+
+  RegionNode* reg = new (C,2)RegionNode(2);
+  reg->set_req(1, proj2);
+  register_node(reg, loop, iff, ddepth);
+
+  IfNode* dum_if = new (C,2)IfNode(reg, short_circuit_if(NULL, proj), iff->_prob, iff->_fcnt);
+  register_node(dum_if, loop, reg, ddepth);
+
+  proj->set_req(0, dum_if); // reattach
+  set_idom(proj, dum_if, ddepth);
+
+  ProjNode* dum_proj = proj_clone(other_proj, dum_if);
+  register_node(dum_proj, loop, dum_if, ddepth);
+
+  return reg;
+}
+
+//------------------------------ insert_cmpi_loop_exit -------------------------------------
+// Clone a signed compare loop exit from an unsigned compare and
+// insert it before the unsigned cmp on the stay-in-loop path.
+// All new nodes inserted in the dominator tree between the original
+// if and it's projections.  The original if test is replaced with
+// a constant to force the stay-in-loop path.
+//
+// This is done to make sure that the original if and it's projections
+// still dominate the same set of control nodes, that the ctrl() relation
+// from data nodes to them is preserved, and that their loop nesting is
+// preserved.
+//
+// before
+//          if(i <u limit)    unsigned compare loop exit
+//         /       |
+//        v        v
+//   exit-proj   stay-in-loop-proj
+//
+// after
+//          if(stay-in-loop-const)  original if
+//         /       |
+//        /        v
+//       /  if(i <  limit)    new signed test
+//      /  /       |
+//     /  /        v
+//    /  /  if(i <u limit)    new cloned unsigned test
+//   /  /   /      |
+//   v  v  v       |
+//    region       |
+//        |        |
+//      dum-if     |
+//     /  |        |
+// ether  |        |
+//        v        v
+//   exit-proj   stay-in-loop-proj
+//
+IfNode* PhaseIdealLoop::insert_cmpi_loop_exit(IfNode* if_cmpu, IdealLoopTree *loop) {
+  const bool Signed   = true;
+  const bool Unsigned = false;
+
+  BoolNode* bol = if_cmpu->in(1)->as_Bool();
+  if (bol->_test._test != BoolTest::lt) return NULL;
+  CmpNode* cmpu = bol->in(1)->as_Cmp();
+  if (cmpu->Opcode() != Op_CmpU) return NULL;
+  int stride = stride_of_possible_iv(if_cmpu);
+  if (stride == 0) return NULL;
+
+  ProjNode* lp_continue = stay_in_loop(if_cmpu, loop)->as_Proj();
+  ProjNode* lp_exit     = if_cmpu->proj_out(!lp_continue->is_IfTrue())->as_Proj();
+
+  Node* limit = NULL;
+  if (stride > 0) {
+    limit = cmpu->in(2);
+  } else {
+    limit = _igvn.makecon(TypeInt::ZERO);
+    set_ctrl(limit, C->root());
+  }
+  // Create a new region on the exit path
+  RegionNode* reg = insert_region_before_proj(lp_exit);
+
+  // Clone the if-cmpu-true-false using a signed compare
+  BoolTest::mask rel_i = stride > 0 ? bol->_test._test : BoolTest::ge;
+  ProjNode* cmpi_exit = insert_if_before_proj(cmpu->in(1), Signed, rel_i, limit, lp_continue);
+  reg->add_req(cmpi_exit);
+
+  // Clone the if-cmpu-true-false
+  BoolTest::mask rel_u = bol->_test._test;
+  ProjNode* cmpu_exit = insert_if_before_proj(cmpu->in(1), Unsigned, rel_u, cmpu->in(2), lp_continue);
+  reg->add_req(cmpu_exit);
+
+  // Force original if to stay in loop.
+  short_circuit_if(if_cmpu, lp_continue);
+
+  return cmpi_exit->in(0)->as_If();
+}
+
+//------------------------------ remove_cmpi_loop_exit -------------------------------------
+// Remove a previously inserted signed compare loop exit.
+void PhaseIdealLoop::remove_cmpi_loop_exit(IfNode* if_cmp, IdealLoopTree *loop) {
+  Node* lp_proj = stay_in_loop(if_cmp, loop);
+  assert(if_cmp->in(1)->in(1)->Opcode() == Op_CmpI &&
+         stay_in_loop(lp_proj, loop)->is_If() &&
+         stay_in_loop(lp_proj, loop)->in(1)->in(1)->Opcode() == Op_CmpU, "inserted cmpi before cmpu");
+  Node *con = _igvn.makecon(lp_proj->is_IfTrue() ? TypeInt::ONE : TypeInt::ZERO);
+  set_ctrl(con, C->root());
+  if_cmp->set_req(1, con);
+}
+
+//------------------------------ scheduled_nodelist -------------------------------------
+// Create a post order schedule of nodes that are in the
+// "member" set.  The list is returned in "sched".
+// The first node in "sched" is the loop head, followed by
+// nodes which have no inputs in the "member" set, and then
+// followed by the nodes that have an immediate input dependence
+// on a node in "sched".
+void PhaseIdealLoop::scheduled_nodelist( IdealLoopTree *loop, VectorSet& member, Node_List &sched ) {
+
+  assert(member.test(loop->_head->_idx), "loop head must be in member set");
+  Arena *a = Thread::current()->resource_area();
+  VectorSet visited(a);
+  Node_Stack nstack(a, loop->_body.size());
+
+  Node* n  = loop->_head;  // top of stack is cached in "n"
+  uint idx = 0;
+  visited.set(n->_idx);
+
+  // Initially push all with no inputs from within member set
+  for(uint i = 0; i < loop->_body.size(); i++ ) {
+    Node *elt = loop->_body.at(i);
+    if (member.test(elt->_idx)) {
+      bool found = false;
+      for (uint j = 0; j < elt->req(); j++) {
+        Node* def = elt->in(j);
+        if (def && member.test(def->_idx) && def != elt) {
+          found = true;
+          break;
+        }
+      }
+      if (!found && elt != loop->_head) {
+        nstack.push(n, idx);
+        n = elt;
+        assert(!visited.test(n->_idx), "not seen yet");
+        visited.set(n->_idx);
+      }
+    }
+  }
+
+  // traverse out's that are in the member set
+  while (true) {
+    if (idx < n->outcnt()) {
+      Node* use = n->raw_out(idx);
+      idx++;
+      if (!visited.test_set(use->_idx)) {
+        if (member.test(use->_idx)) {
+          nstack.push(n, idx);
+          n = use;
+          idx = 0;
+        }
+      }
+    } else {
+      // All outputs processed
+      sched.push(n);
+      if (nstack.is_empty()) break;
+      n   = nstack.node();
+      idx = nstack.index();
+      nstack.pop();
+    }
+  }
+}
+
+
+//------------------------------ has_use_in_set -------------------------------------
+// Has a use in the vector set
+bool PhaseIdealLoop::has_use_in_set( Node* n, VectorSet& vset ) {
+  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+    Node* use = n->fast_out(j);
+    if (vset.test(use->_idx)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+//------------------------------ has_use_internal_to_set -------------------------------------
+// Has use internal to the vector set (ie. not in a phi at the loop head)
+bool PhaseIdealLoop::has_use_internal_to_set( Node* n, VectorSet& vset, IdealLoopTree *loop ) {
+  Node* head  = loop->_head;
+  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+    Node* use = n->fast_out(j);
+    if (vset.test(use->_idx) && !(use->is_Phi() && use->in(0) == head)) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+//------------------------------ clone_for_use_outside_loop -------------------------------------
+// clone "n" for uses that are outside of loop
+void PhaseIdealLoop::clone_for_use_outside_loop( IdealLoopTree *loop, Node* n, Node_List& worklist ) {
+
+  assert(worklist.size() == 0, "should be empty");
+  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+    Node* use = n->fast_out(j);
+    if( !loop->is_member(get_loop(has_ctrl(use) ? get_ctrl(use) : use)) ) {
+      worklist.push(use);
+    }
+  }
+  while( worklist.size() ) {
+    Node *use = worklist.pop();
+    if (!has_node(use) || use->in(0) == C->top()) continue;
+    uint j;
+    for (j = 0; j < use->req(); j++) {
+      if (use->in(j) == n) break;
+    }
+    assert(j < use->req(), "must be there");
+
+    // clone "n" and insert it between the inputs of "n" and the use outside the loop
+    Node* n_clone = n->clone();
+    _igvn.hash_delete(use);
+    use->set_req(j, n_clone);
+    _igvn._worklist.push(use);
+    if (!use->is_Phi()) {
+      Node* use_c = has_ctrl(use) ? get_ctrl(use) : use->in(0);
+      set_ctrl(n_clone, use_c);
+      assert(!loop->is_member(get_loop(use_c)), "should be outside loop");
+      get_loop(use_c)->_body.push(n_clone);
+    } else {
+      // Use in a phi is considered a use in the associated predecessor block
+      Node *prevbb = use->in(0)->in(j);
+      set_ctrl(n_clone, prevbb);
+      assert(!loop->is_member(get_loop(prevbb)), "should be outside loop");
+      get_loop(prevbb)->_body.push(n_clone);
+    }
+    _igvn.register_new_node_with_optimizer(n_clone);
+#if !defined(PRODUCT)
+    if (TracePartialPeeling) {
+      tty->print_cr("loop exit cloning old: %d new: %d newbb: %d", n->_idx, n_clone->_idx, get_ctrl(n_clone)->_idx);
+    }
+#endif
+  }
+}
+
+
+//------------------------------ clone_for_special_use_inside_loop -------------------------------------
+// clone "n" for special uses that are in the not_peeled region.
+// If these def-uses occur in separate blocks, the code generator
+// marks the method as not compilable.  For example, if a "BoolNode"
+// is in a different basic block than the "IfNode" that uses it, then
+// the compilation is aborted in the code generator.
+void PhaseIdealLoop::clone_for_special_use_inside_loop( IdealLoopTree *loop, Node* n,
+                                                        VectorSet& not_peel, Node_List& sink_list, Node_List& worklist ) {
+  if (n->is_Phi() || n->is_Load()) {
+    return;
+  }
+  assert(worklist.size() == 0, "should be empty");
+  for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+    Node* use = n->fast_out(j);
+    if ( not_peel.test(use->_idx) &&
+         (use->is_If() || use->is_CMove() || use->is_Bool()) &&
+         use->in(1) == n)  {
+      worklist.push(use);
+    }
+  }
+  if (worklist.size() > 0) {
+    // clone "n" and insert it between inputs of "n" and the use
+    Node* n_clone = n->clone();
+    loop->_body.push(n_clone);
+    _igvn.register_new_node_with_optimizer(n_clone);
+    set_ctrl(n_clone, get_ctrl(n));
+    sink_list.push(n_clone);
+    not_peel <<= n_clone->_idx;  // add n_clone to not_peel set.
+#if !defined(PRODUCT)
+    if (TracePartialPeeling) {
+      tty->print_cr("special not_peeled cloning old: %d new: %d", n->_idx, n_clone->_idx);
+    }
+#endif
+    while( worklist.size() ) {
+      Node *use = worklist.pop();
+      _igvn.hash_delete(use);
+      _igvn._worklist.push(use);
+      for (uint j = 1; j < use->req(); j++) {
+        if (use->in(j) == n) {
+          use->set_req(j, n_clone);
+        }
+      }
+    }
+  }
+}
+
+
+//------------------------------ insert_phi_for_loop -------------------------------------
+// Insert phi(lp_entry_val, back_edge_val) at use->in(idx) for loop lp if phi does not already exist
+void PhaseIdealLoop::insert_phi_for_loop( Node* use, uint idx, Node* lp_entry_val, Node* back_edge_val, LoopNode* lp ) {
+  Node *phi = PhiNode::make(lp, back_edge_val);
+  phi->set_req(LoopNode::EntryControl, lp_entry_val);
+  // Use existing phi if it already exists
+  Node *hit = _igvn.hash_find_insert(phi);
+  if( hit == NULL ) {
+    _igvn.register_new_node_with_optimizer(phi);
+    set_ctrl(phi, lp);
+  } else {
+    // Remove the new phi from the graph and use the hit
+    _igvn.remove_dead_node(phi);
+    phi = hit;
+  }
+  _igvn.hash_delete(use);
+  _igvn._worklist.push(use);
+  use->set_req(idx, phi);
+}
+
+#ifdef ASSERT
+//------------------------------ is_valid_loop_partition -------------------------------------
+// Validate the loop partition sets: peel and not_peel
+bool PhaseIdealLoop::is_valid_loop_partition( IdealLoopTree *loop, VectorSet& peel, Node_List& peel_list,
+                                              VectorSet& not_peel ) {
+  uint i;
+  // Check that peel_list entries are in the peel set
+  for (i = 0; i < peel_list.size(); i++) {
+    if (!peel.test(peel_list.at(i)->_idx)) {
+      return false;
+    }
+  }
+  // Check at loop members are in one of peel set or not_peel set
+  for (i = 0; i < loop->_body.size(); i++ ) {
+    Node *def  = loop->_body.at(i);
+    uint di = def->_idx;
+    // Check that peel set elements are in peel_list
+    if (peel.test(di)) {
+      if (not_peel.test(di)) {
+        return false;
+      }
+      // Must be in peel_list also
+      bool found = false;
+      for (uint j = 0; j < peel_list.size(); j++) {
+        if (peel_list.at(j)->_idx == di) {
+          found = true;
+          break;
+        }
+      }
+      if (!found) {
+        return false;
+      }
+    } else if (not_peel.test(di)) {
+      if (peel.test(di)) {
+        return false;
+      }
+    } else {
+      return false;
+    }
+  }
+  return true;
+}
+
+//------------------------------ is_valid_clone_loop_exit_use -------------------------------------
+// Ensure a use outside of loop is of the right form
+bool PhaseIdealLoop::is_valid_clone_loop_exit_use( IdealLoopTree *loop, Node* use, uint exit_idx) {
+  Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
+  return (use->is_Phi() &&
+          use_c->is_Region() && use_c->req() == 3 &&
+          (use_c->in(exit_idx)->Opcode() == Op_IfTrue ||
+           use_c->in(exit_idx)->Opcode() == Op_IfFalse ||
+           use_c->in(exit_idx)->Opcode() == Op_JumpProj) &&
+          loop->is_member( get_loop( use_c->in(exit_idx)->in(0) ) ) );
+}
+
+//------------------------------ is_valid_clone_loop_form -------------------------------------
+// Ensure that all uses outside of loop are of the right form
+bool PhaseIdealLoop::is_valid_clone_loop_form( IdealLoopTree *loop, Node_List& peel_list,
+                                               uint orig_exit_idx, uint clone_exit_idx) {
+  uint len = peel_list.size();
+  for (uint i = 0; i < len; i++) {
+    Node *def = peel_list.at(i);
+
+    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+      Node *use = def->fast_out(j);
+      Node *use_c = has_ctrl(use) ? get_ctrl(use) : use;
+      if (!loop->is_member(get_loop(use_c))) {
+        // use is not in the loop, check for correct structure
+        if (use->in(0) == def) {
+          // Okay
+        } else if (!is_valid_clone_loop_exit_use(loop, use, orig_exit_idx)) {
+          return false;
+        }
+      }
+    }
+  }
+  return true;
+}
+#endif
+
+//------------------------------ partial_peel -------------------------------------
+// Partially peel (aka loop rotation) the top portion of a loop (called
+// the peel section below) by cloning it and placing one copy just before
+// the new loop head and the other copy at the bottom of the new loop.
+//
+//    before                       after                where it came from
+//
+//    stmt1                        stmt1
+//  loop:                          stmt2                     clone
+//    stmt2                        if condA goto exitA       clone
+//    if condA goto exitA        new_loop:                   new
+//    stmt3                        stmt3                     clone
+//    if !condB goto loop          if condB goto exitB       clone
+//  exitB:                         stmt2                     orig
+//    stmt4                        if !condA goto new_loop   orig
+//  exitA:                         goto exitA
+//                               exitB:
+//                                 stmt4
+//                               exitA:
+//
+// Step 1: find the cut point: an exit test on probable
+//         induction variable.
+// Step 2: schedule (with cloning) operations in the peel
+//         section that can be executed after the cut into
+//         the section that is not peeled.  This may need
+//         to clone operations into exit blocks.  For
+//         instance, a reference to A[i] in the not-peel
+//         section and a reference to B[i] in an exit block
+//         may cause a left-shift of i by 2 to be placed
+//         in the peel block.  This step will clone the left
+//         shift into the exit block and sink the left shift
+//         from the peel to the not-peel section.
+// Step 3: clone the loop, retarget the control, and insert
+//         phis for values that are live across the new loop
+//         head.  This is very dependent on the graph structure
+//         from clone_loop.  It creates region nodes for
+//         exit control and associated phi nodes for values
+//         flow out of the loop through that exit.  The region
+//         node is dominated by the clone's control projection.
+//         So the clone's peel section is placed before the
+//         new loop head, and the clone's not-peel section is
+//         forms the top part of the new loop.  The original
+//         peel section forms the tail of the new loop.
+// Step 4: update the dominator tree and recompute the
+//         dominator depth.
+//
+//                   orig
+//
+//                  stmt1
+//                    |
+//                    v
+//                   loop<----+
+//                     |      |
+//                   stmt2    |
+//                     |      |
+//                     v      |
+//                    ifA     |
+//                   / |      |
+//                  v  v      |
+//               false true   ^  <-- last_peel
+//               /     |      |
+//              /   ===|==cut |
+//             /     stmt3    |  <-- first_not_peel
+//            /        |      |
+//            |        v      |
+//            v       ifB     |
+//          exitA:   / \      |
+//                  /   \     |
+//                 v     v    |
+//               false true   |
+//               /       \    |
+//              /         ----+
+//             |
+//             v
+//           exitB:
+//           stmt4
+//
+//
+//            after clone loop
+//
+//                   stmt1
+//                 /       \
+//        clone   /         \   orig
+//               /           \
+//              /             \
+//             v               v
+//   +---->loop                loop<----+
+//   |      |                    |      |
+//   |    stmt2                stmt2    |
+//   |      |                    |      |
+//   |      v                    v      |
+//   |      ifA                 ifA     |
+//   |      | \                / |      |
+//   |      v  v              v  v      |
+//   ^    true  false      false true   ^  <-- last_peel
+//   |      |   ^   \       /    |      |
+//   | cut==|==  \   \     /  ===|==cut |
+//   |    stmt3   \   \   /    stmt3    |  <-- first_not_peel
+//   |      |    dom   | |       |      |
+//   |      v      \  1v v2      v      |
+//   |      ifB     regionA     ifB     |
+//   |      / \        |       / \      |
+//   |     /   \       v      /   \     |
+//   |    v     v    exitA:  v     v    |
+//   |    true  false      false true   |
+//   |    /     ^   \      /       \    |
+//   +----       \   \    /         ----+
+//               dom  \  /
+//                 \  1v v2
+//                  regionB
+//                     |
+//                     v
+//                   exitB:
+//                   stmt4
+//
+//
+//           after partial peel
+//
+//                  stmt1
+//                 /
+//        clone   /             orig
+//               /          TOP
+//              /             \
+//             v               v
+//    TOP->region             region----+
+//          |                    |      |
+//        stmt2                stmt2    |
+//          |                    |      |
+//          v                    v      |
+//          ifA                 ifA     |
+//          | \                / |      |
+//          v  v              v  v      |
+//        true  false      false true   |     <-- last_peel
+//          |   ^   \       /    +------|---+
+//  +->newloop   \   \     /  === ==cut |   |
+//  |     stmt3   \   \   /     TOP     |   |
+//  |       |    dom   | |      stmt3   |   | <-- first_not_peel
+//  |       v      \  1v v2      v      |   |
+//  |       ifB     regionA     ifB     ^   v
+//  |       / \        |       / \      |   |
+//  |      /   \       v      /   \     |   |
+//  |     v     v    exitA:  v     v    |   |
+//  |     true  false      false true   |   |
+//  |     /     ^   \      /       \    |   |
+//  |    |       \   \    /         v   |   |
+//  |    |       dom  \  /         TOP  |   |
+//  |    |         \  1v v2             |   |
+//  ^    v          regionB             |   |
+//  |    |             |                |   |
+//  |    |             v                ^   v
+//  |    |           exitB:             |   |
+//  |    |           stmt4              |   |
+//  |    +------------>-----------------+   |
+//  |                                       |
+//  +-----------------<---------------------+
+//
+//
+//              final graph
+//
+//                  stmt1
+//                    |
+//                    v
+//         ........> ifA clone
+//         :        / |
+//        dom      /  |
+//         :      v   v
+//         :  false   true
+//         :  |       |
+//         :  |     stmt2 clone
+//         :  |       |
+//         :  |       v
+//         :  |    newloop<-----+
+//         :  |        |        |
+//         :  |     stmt3 clone |
+//         :  |        |        |
+//         :  |        v        |
+//         :  |       ifB       |
+//         :  |      / \        |
+//         :  |     v   v       |
+//         :  |  false true     |
+//         :  |   |     |       |
+//         :  |   v    stmt2    |
+//         :  | exitB:  |       |
+//         :  | stmt4   v       |
+//         :  |       ifA orig  |
+//         :  |      /  \       |
+//         :  |     /    \      |
+//         :  |    v     v      |
+//         :  |  false  true    |
+//         :  |  /        \     |
+//         :  v  v         -----+
+//          RegionA
+//             |
+//             v
+//           exitA
+//
+bool PhaseIdealLoop::partial_peel( IdealLoopTree *loop, Node_List &old_new ) {
+
+  LoopNode *head  = loop->_head->as_Loop();
+
+  if (head->is_partial_peel_loop() || head->partial_peel_has_failed()) {
+    return false;
+  }
+
+  // Check for complex exit control
+  for(uint ii = 0; ii < loop->_body.size(); ii++ ) {
+    Node *n = loop->_body.at(ii);
+    int opc = n->Opcode();
+    if (n->is_Call()        ||
+        opc == Op_Catch     ||
+        opc == Op_CatchProj ||
+        opc == Op_Jump      ||
+        opc == Op_JumpProj) {
+#if !defined(PRODUCT)
+      if (TracePartialPeeling) {
+        tty->print_cr("\nExit control too complex: lp: %d", head->_idx);
+      }
+#endif
+      return false;
+    }
+  }
+
+  int dd = dom_depth(head);
+
+  // Step 1: find cut point
+
+  // Walk up dominators to loop head looking for first loop exit
+  // which is executed on every path thru loop.
+  IfNode *peel_if = NULL;
+  IfNode *peel_if_cmpu = NULL;
+
+  Node *iff = loop->tail();
+  while( iff != head ) {
+    if( iff->is_If() ) {
+      Node *ctrl = get_ctrl(iff->in(1));
+      if (ctrl->is_top()) return false; // Dead test on live IF.
+      // If loop-varying exit-test, check for induction variable
+      if( loop->is_member(get_loop(ctrl)) &&
+          loop->is_loop_exit(iff) &&
+          is_possible_iv_test(iff)) {
+        Node* cmp = iff->in(1)->in(1);
+        if (cmp->Opcode() == Op_CmpI) {
+          peel_if = iff->as_If();
+        } else {
+          assert(cmp->Opcode() == Op_CmpU, "must be CmpI or CmpU");
+          peel_if_cmpu = iff->as_If();
+        }
+      }
+    }
+    iff = idom(iff);
+  }
+  // Prefer signed compare over unsigned compare.
+  IfNode* new_peel_if = NULL;
+  if (peel_if == NULL) {
+    if (!PartialPeelAtUnsignedTests || peel_if_cmpu == NULL) {
+      return false;   // No peel point found
+    }
+    new_peel_if = insert_cmpi_loop_exit(peel_if_cmpu, loop);
+    if (new_peel_if == NULL) {
+      return false;   // No peel point found
+    }
+    peel_if = new_peel_if;
+  }
+  Node* last_peel        = stay_in_loop(peel_if, loop);
+  Node* first_not_peeled = stay_in_loop(last_peel, loop);
+  if (first_not_peeled == NULL || first_not_peeled == head) {
+    return false;
+  }
+
+#if !defined(PRODUCT)
+  if (TracePartialPeeling) {
+    tty->print_cr("before partial peel one iteration");
+    Node_List wl;
+    Node* t = head->in(2);
+    while (true) {
+      wl.push(t);
+      if (t == head) break;
+      t = idom(t);
+    }
+    while (wl.size() > 0) {
+      Node* tt = wl.pop();
+      tt->dump();
+      if (tt == last_peel) tty->print_cr("-- cut --");
+    }
+  }
+#endif
+  ResourceArea *area = Thread::current()->resource_area();
+  VectorSet peel(area);
+  VectorSet not_peel(area);
+  Node_List peel_list(area);
+  Node_List worklist(area);
+  Node_List sink_list(area);
+
+  // Set of cfg nodes to peel are those that are executable from
+  // the head through last_peel.
+  assert(worklist.size() == 0, "should be empty");
+  worklist.push(head);
+  peel.set(head->_idx);
+  while (worklist.size() > 0) {
+    Node *n = worklist.pop();
+    if (n != last_peel) {
+      for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++) {
+        Node* use = n->fast_out(j);
+        if (use->is_CFG() &&
+            loop->is_member(get_loop(use)) &&
+            !peel.test_set(use->_idx)) {
+          worklist.push(use);
+        }
+      }
+    }
+  }
+
+  // Set of non-cfg nodes to peel are those that are control
+  // dependent on the cfg nodes.
+  uint i;
+  for(i = 0; i < loop->_body.size(); i++ ) {
+    Node *n = loop->_body.at(i);
+    Node *n_c = has_ctrl(n) ? get_ctrl(n) : n;
+    if (peel.test(n_c->_idx)) {
+      peel.set(n->_idx);
+    } else {
+      not_peel.set(n->_idx);
+    }
+  }
+
+  // Step 2: move operations from the peeled section down into the
+  //         not-peeled section
+
+  // Get a post order schedule of nodes in the peel region
+  // Result in right-most operand.
+  scheduled_nodelist(loop, peel, peel_list );
+
+  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
+
+  // For future check for too many new phis
+  uint old_phi_cnt = 0;
+  for (DUIterator_Fast jmax, j = head->fast_outs(jmax); j < jmax; j++) {
+    Node* use = head->fast_out(j);
+    if (use->is_Phi()) old_phi_cnt++;
+  }
+
+#if !defined(PRODUCT)
+  if (TracePartialPeeling) {
+    tty->print_cr("\npeeled list");
+  }
+#endif
+
+  // Evacuate nodes in peel region into the not_peeled region if possible
+  uint new_phi_cnt = 0;
+  for (i = 0; i < peel_list.size();) {
+    Node* n = peel_list.at(i);
+#if !defined(PRODUCT)
+  if (TracePartialPeeling) n->dump();
+#endif
+    bool incr = true;
+    if ( !n->is_CFG() ) {
+
+      if ( has_use_in_set(n, not_peel) ) {
+
+        // If not used internal to the peeled region,
+        // move "n" from peeled to not_peeled region.
+
+        if ( !has_use_internal_to_set(n, peel, loop) ) {
+
+          // if not pinned and not a load (which maybe anti-dependent on a store)
+          // and not a CMove (Matcher expects only bool->cmove).
+          if ( n->in(0) == NULL && !n->is_Load() && !n->is_CMove() ) {
+            clone_for_use_outside_loop( loop, n, worklist );
+
+            sink_list.push(n);
+            peel     >>= n->_idx; // delete n from peel set.
+            not_peel <<= n->_idx; // add n to not_peel set.
+            peel_list.remove(i);
+            incr = false;
+#if !defined(PRODUCT)
+            if (TracePartialPeeling) {
+              tty->print_cr("sink to not_peeled region: %d newbb: %d",
+                            n->_idx, get_ctrl(n)->_idx);
+            }
+#endif
+          }
+        } else {
+          // Otherwise check for special def-use cases that span
+          // the peel/not_peel boundary such as bool->if
+          clone_for_special_use_inside_loop( loop, n, not_peel, sink_list, worklist );
+          new_phi_cnt++;
+        }
+      }
+    }
+    if (incr) i++;
+  }
+
+  if (new_phi_cnt > old_phi_cnt + PartialPeelNewPhiDelta) {
+#if !defined(PRODUCT)
+    if (TracePartialPeeling) {
+      tty->print_cr("\nToo many new phis: %d  old %d new cmpi: %c",
+                    new_phi_cnt, old_phi_cnt, new_peel_if != NULL?'T':'F');
+    }
+#endif
+    if (new_peel_if != NULL) {
+      remove_cmpi_loop_exit(new_peel_if, loop);
+    }
+    // Inhibit more partial peeling on this loop
+    assert(!head->is_partial_peel_loop(), "not partial peeled");
+    head->mark_partial_peel_failed();
+    return false;
+  }
+
+  // Step 3: clone loop, retarget control, and insert new phis
+
+  // Create new loop head for new phis and to hang
+  // the nodes being moved (sinked) from the peel region.
+  LoopNode* new_head = new (C, 3) LoopNode(last_peel, last_peel);
+  _igvn.register_new_node_with_optimizer(new_head);
+  assert(first_not_peeled->in(0) == last_peel, "last_peel <- first_not_peeled");
+  first_not_peeled->set_req(0, new_head);
+  set_loop(new_head, loop);
+  loop->_body.push(new_head);
+  not_peel.set(new_head->_idx);
+  set_idom(new_head, last_peel, dom_depth(first_not_peeled));
+  set_idom(first_not_peeled, new_head, dom_depth(first_not_peeled));
+
+  while (sink_list.size() > 0) {
+    Node* n = sink_list.pop();
+    set_ctrl(n, new_head);
+  }
+
+  assert(is_valid_loop_partition(loop, peel, peel_list, not_peel), "bad partition");
+
+  clone_loop( loop, old_new, dd );
+
+  const uint clone_exit_idx = 1;
+  const uint orig_exit_idx  = 2;
+  assert(is_valid_clone_loop_form( loop, peel_list, orig_exit_idx, clone_exit_idx ), "bad clone loop");
+
+  Node* head_clone             = old_new[head->_idx];
+  LoopNode* new_head_clone     = old_new[new_head->_idx]->as_Loop();
+  Node* orig_tail_clone        = head_clone->in(2);
+
+  // Add phi if "def" node is in peel set and "use" is not
+
+  for(i = 0; i < peel_list.size(); i++ ) {
+    Node *def  = peel_list.at(i);
+    if (!def->is_CFG()) {
+      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+        Node *use = def->fast_out(j);
+        if (has_node(use) && use->in(0) != C->top() &&
+            (!peel.test(use->_idx) ||
+             (use->is_Phi() && use->in(0) == head)) ) {
+          worklist.push(use);
+        }
+      }
+      while( worklist.size() ) {
+        Node *use = worklist.pop();
+        for (uint j = 1; j < use->req(); j++) {
+          Node* n = use->in(j);
+          if (n == def) {
+
+            // "def" is in peel set, "use" is not in peel set
+            // or "use" is in the entry boundary (a phi) of the peel set
+
+            Node* use_c = has_ctrl(use) ? get_ctrl(use) : use;
+
+            if ( loop->is_member(get_loop( use_c )) ) {
+              // use is in loop
+              if (old_new[use->_idx] != NULL) { // null for dead code
+                Node* use_clone = old_new[use->_idx];
+                _igvn.hash_delete(use);
+                use->set_req(j, C->top());
+                _igvn._worklist.push(use);
+                insert_phi_for_loop( use_clone, j, old_new[def->_idx], def, new_head_clone );
+              }
+            } else {
+              assert(is_valid_clone_loop_exit_use(loop, use, orig_exit_idx), "clone loop format");
+              // use is not in the loop, check if the live range includes the cut
+              Node* lp_if = use_c->in(orig_exit_idx)->in(0);
+              if (not_peel.test(lp_if->_idx)) {
+                assert(j == orig_exit_idx, "use from original loop");
+                insert_phi_for_loop( use, clone_exit_idx, old_new[def->_idx], def, new_head_clone );
+              }
+            }
+          }
+        }
+      }
+    }
+  }
+
+  // Step 3b: retarget control
+
+  // Redirect control to the new loop head if a cloned node in
+  // the not_peeled region has control that points into the peeled region.
+  // This necessary because the cloned peeled region will be outside
+  // the loop.
+  //                            from    to
+  //          cloned-peeled    <---+
+  //    new_head_clone:            |    <--+
+  //          cloned-not_peeled  in(0)    in(0)
+  //          orig-peeled
+
+  for(i = 0; i < loop->_body.size(); i++ ) {
+    Node *n = loop->_body.at(i);
+    if (!n->is_CFG()           && n->in(0) != NULL        &&
+        not_peel.test(n->_idx) && peel.test(n->in(0)->_idx)) {
+      Node* n_clone = old_new[n->_idx];
+      _igvn.hash_delete(n_clone);
+      n_clone->set_req(0, new_head_clone);
+      _igvn._worklist.push(n_clone);
+    }
+  }
+
+  // Backedge of the surviving new_head (the clone) is original last_peel
+  _igvn.hash_delete(new_head_clone);
+  new_head_clone->set_req(LoopNode::LoopBackControl, last_peel);
+  _igvn._worklist.push(new_head_clone);
+
+  // Cut first node in original not_peel set
+  _igvn.hash_delete(new_head);
+  new_head->set_req(LoopNode::EntryControl, C->top());
+  new_head->set_req(LoopNode::LoopBackControl, C->top());
+  _igvn._worklist.push(new_head);
+
+  // Copy head_clone back-branch info to original head
+  // and remove original head's loop entry and
+  // clone head's back-branch
+  _igvn.hash_delete(head);
+  _igvn.hash_delete(head_clone);
+  head->set_req(LoopNode::EntryControl, head_clone->in(LoopNode::LoopBackControl));
+  head->set_req(LoopNode::LoopBackControl, C->top());
+  head_clone->set_req(LoopNode::LoopBackControl, C->top());
+  _igvn._worklist.push(head);
+  _igvn._worklist.push(head_clone);
+
+  // Similarly modify the phis
+  for (DUIterator_Fast kmax, k = head->fast_outs(kmax); k < kmax; k++) {
+    Node* use = head->fast_out(k);
+    if (use->is_Phi() && use->outcnt() > 0) {
+      Node* use_clone = old_new[use->_idx];
+      _igvn.hash_delete(use);
+      _igvn.hash_delete(use_clone);
+      use->set_req(LoopNode::EntryControl, use_clone->in(LoopNode::LoopBackControl));
+      use->set_req(LoopNode::LoopBackControl, C->top());
+      use_clone->set_req(LoopNode::LoopBackControl, C->top());
+      _igvn._worklist.push(use);
+      _igvn._worklist.push(use_clone);
+    }
+  }
+
+  // Step 4: update dominator tree and dominator depth
+
+  set_idom(head, orig_tail_clone, dd);
+  recompute_dom_depth();
+
+  // Inhibit more partial peeling on this loop
+  new_head_clone->set_partial_peel_loop();
+  C->set_major_progress();
+
+#if !defined(PRODUCT)
+  if (TracePartialPeeling) {
+    tty->print_cr("\nafter partial peel one iteration");
+    Node_List wl(area);
+    Node* t = last_peel;
+    while (true) {
+      wl.push(t);
+      if (t == head_clone) break;
+      t = idom(t);
+    }
+    while (wl.size() > 0) {
+      Node* tt = wl.pop();
+      if (tt == head) tty->print_cr("orig head");
+      else if (tt == new_head_clone) tty->print_cr("new head");
+      else if (tt == head_clone) tty->print_cr("clone head");
+      tt->dump();
+    }
+  }
+#endif
+  return true;
+}
+
+//------------------------------reorg_offsets----------------------------------
+// Reorganize offset computations to lower register pressure.  Mostly
+// prevent loop-fallout uses of the pre-incremented trip counter (which are
+// then alive with the post-incremented trip counter forcing an extra
+// register move)
+void PhaseIdealLoop::reorg_offsets( IdealLoopTree *loop ) {
+
+  CountedLoopNode *cl = loop->_head->as_CountedLoop();
+  CountedLoopEndNode *cle = cl->loopexit();
+  if( !cle ) return;            // The occasional dead loop
+  // Find loop exit control
+  Node *exit = cle->proj_out(false);
+  assert( exit->Opcode() == Op_IfFalse, "" );
+
+  // Check for the special case of folks using the pre-incremented
+  // trip-counter on the fall-out path (forces the pre-incremented
+  // and post-incremented trip counter to be live at the same time).
+  // Fix this by adjusting to use the post-increment trip counter.
+  Node *phi = cl->phi();
+  if( !phi ) return;            // Dead infinite loop
+  bool progress = true;
+  while (progress) {
+    progress = false;
+    for (DUIterator_Fast imax, i = phi->fast_outs(imax); i < imax; i++) {
+      Node* use = phi->fast_out(i);   // User of trip-counter
+      if (!has_ctrl(use))  continue;
+      Node *u_ctrl = get_ctrl(use);
+      if( use->is_Phi() ) {
+        u_ctrl = NULL;
+        for( uint j = 1; j < use->req(); j++ )
+          if( use->in(j) == phi )
+            u_ctrl = dom_lca( u_ctrl, use->in(0)->in(j) );
+      }
+      IdealLoopTree *u_loop = get_loop(u_ctrl);
+      // Look for loop-invariant use
+      if( u_loop == loop ) continue;
+      if( loop->is_member( u_loop ) ) continue;
+      // Check that use is live out the bottom.  Assuming the trip-counter
+      // update is right at the bottom, uses of of the loop middle are ok.
+      if( dom_lca( exit, u_ctrl ) != exit ) continue;
+      // protect against stride not being a constant
+      if( !cle->stride_is_con() ) continue;
+      // Hit!  Refactor use to use the post-incremented tripcounter.
+      // Compute a post-increment tripcounter.
+      Node *opaq = new (C, 2) Opaque2Node( cle->incr() );
+      register_new_node( opaq, u_ctrl );
+      Node *neg_stride = _igvn.intcon(-cle->stride_con());
+      set_ctrl(neg_stride, C->root());
+      Node *post = new (C, 3) AddINode( opaq, neg_stride);
+      register_new_node( post, u_ctrl );
+      _igvn.hash_delete(use);
+      _igvn._worklist.push(use);
+      for( uint j = 1; j < use->req(); j++ )
+        if( use->in(j) == phi )
+          use->set_req(j, post);
+      // Since DU info changed, rerun loop
+      progress = true;
+      break;
+    }
+  }
+
+}
diff --git a/src/share/vm/opto/machnode.cpp b/src/share/vm/opto/machnode.cpp
new file mode 100644
index 000000000..8b88f00b5
--- /dev/null
+++ b/src/share/vm/opto/machnode.cpp
@@ -0,0 +1,707 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_machnode.cpp.incl"
+
+//=============================================================================
+// Return the value requested
+// result register lookup, corresponding to int_format
+int MachOper::reg(PhaseRegAlloc *ra_, const Node *node) const {
+  return (int)ra_->get_encode(node);
+}
+// input register lookup, corresponding to ext_format
+int MachOper::reg(PhaseRegAlloc *ra_, const Node *node, int idx) const {
+  return (int)(ra_->get_encode(node->in(idx)));
+}
+intptr_t  MachOper::constant() const { return 0x00; }
+bool MachOper::constant_is_oop() const { return false; }
+jdouble MachOper::constantD() const { ShouldNotReachHere(); return 0.0; }
+jfloat  MachOper::constantF() const { ShouldNotReachHere(); return 0.0; }
+jlong   MachOper::constantL() const { ShouldNotReachHere(); return CONST64(0) ; }
+TypeOopPtr *MachOper::oop() const { return NULL; }
+int MachOper::ccode() const { return 0x00; }
+// A zero, default, indicates this value is not needed.
+// May need to lookup the base register, as done in int_ and ext_format
+int MachOper::base (PhaseRegAlloc *ra_, const Node *node, int idx)  const { return 0x00; }
+int MachOper::index(PhaseRegAlloc *ra_, const Node *node, int idx)  const { return 0x00; }
+int MachOper::scale()  const { return 0x00; }
+int MachOper::disp (PhaseRegAlloc *ra_, const Node *node, int idx)  const { return 0x00; }
+int MachOper::constant_disp()  const { return 0; }
+int MachOper::base_position()  const { return -1; }  // no base input
+int MachOper::index_position() const { return -1; }  // no index input
+// Check for PC-Relative displacement
+bool MachOper::disp_is_oop() const { return false; }
+// Return the label
+Label*   MachOper::label()  const { ShouldNotReachHere(); return 0; }
+intptr_t MachOper::method() const { ShouldNotReachHere(); return 0; }
+
+
+//------------------------------negate-----------------------------------------
+// Negate conditional branches.  Error for non-branch operands
+void MachOper::negate() {
+  ShouldNotCallThis();
+}
+
+//-----------------------------type--------------------------------------------
+const Type *MachOper::type() const {
+  return Type::BOTTOM;
+}
+
+//------------------------------in_RegMask-------------------------------------
+const RegMask *MachOper::in_RegMask(int index) const {
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------dump_spec--------------------------------------
+// Print any per-operand special info
+#ifndef PRODUCT
+void MachOper::dump_spec(outputStream *st) const { }
+#endif
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint MachOper::hash() const {
+  ShouldNotCallThis();
+  return 5;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint MachOper::cmp( const MachOper &oper ) const {
+  ShouldNotCallThis();
+  return opcode() == oper.opcode();
+}
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint labelOper::hash() const {
+  return _block_num;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint labelOper::cmp( const MachOper &oper ) const {
+  return (opcode() == oper.opcode()) && (_label == oper.label());
+}
+
+//------------------------------hash-------------------------------------------
+// Print any per-operand special info
+uint methodOper::hash() const {
+  return (uint)_method;
+}
+
+//------------------------------cmp--------------------------------------------
+// Print any per-operand special info
+uint methodOper::cmp( const MachOper &oper ) const {
+  return (opcode() == oper.opcode()) && (_method == oper.method());
+}
+
+
+//=============================================================================
+//------------------------------MachNode---------------------------------------
+
+//------------------------------emit-------------------------------------------
+void MachNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  #ifdef ASSERT
+  tty->print("missing MachNode emit function: ");
+  dump();
+  #endif
+  ShouldNotCallThis();
+}
+
+//------------------------------size-------------------------------------------
+// Size of instruction in bytes
+uint MachNode::size(PhaseRegAlloc *ra_) const {
+  // If a virtual was not defined for this specific instruction,
+  // Call the helper which finds the size by emiting the bits.
+  return MachNode::emit_size(ra_);
+}
+
+//------------------------------size-------------------------------------------
+// Helper function that computes size by emitting code
+uint MachNode::emit_size(PhaseRegAlloc *ra_) const {
+  // Emit into a trash buffer and count bytes emitted.
+  assert(ra_ == ra_->C->regalloc(), "sanity");
+  return ra_->C->scratch_emit_size(this);
+}
+
+
+
+//------------------------------hash-------------------------------------------
+uint MachNode::hash() const {
+  uint no = num_opnds();
+  uint sum = rule();
+  for( uint i=0; i<no; i++ )
+    sum += _opnds[i]->hash();
+  return sum+Node::hash();
+}
+
+//-----------------------------cmp---------------------------------------------
+uint MachNode::cmp( const Node &node ) const {
+  MachNode& n = *((Node&)node).as_Mach();
+  uint no = num_opnds();
+  if( no != n.num_opnds() ) return 0;
+  if( rule() != n.rule() ) return 0;
+  for( uint i=0; i<no; i++ )    // All operands must match
+    if( !_opnds[i]->cmp( *n._opnds[i] ) )
+      return 0;                 // mis-matched operands
+  return 1;                     // match
+}
+
+// Return an equivalent instruction using memory for cisc_operand position
+MachNode *MachNode::cisc_version(int offset, Compile* C) {
+  ShouldNotCallThis();
+  return NULL;
+}
+
+void MachNode::use_cisc_RegMask() {
+  ShouldNotReachHere();
+}
+
+
+//-----------------------------in_RegMask--------------------------------------
+const RegMask &MachNode::in_RegMask( uint idx ) const {
+  uint numopnds = num_opnds();        // Virtual call for number of operands
+  uint skipped   = oper_input_base(); // Sum of leaves skipped so far
+  if( idx < skipped ) {
+    assert( ideal_Opcode() == Op_AddP, "expected base ptr here" );
+    assert( idx == 1, "expected base ptr here" );
+    // debug info can be anywhere
+    return *Compile::current()->matcher()->idealreg2spillmask[Op_RegP];
+  }
+  uint opcnt     = 1;                 // First operand
+  uint num_edges = _opnds[1]->num_edges(); // leaves for first operand
+  while( idx >= skipped+num_edges ) {
+    skipped += num_edges;
+    opcnt++;                          // Bump operand count
+    assert( opcnt < numopnds, "Accessing non-existent operand" );
+    num_edges = _opnds[opcnt]->num_edges(); // leaves for next operand
+  }
+
+  const RegMask *rm = cisc_RegMask();
+  if( rm == NULL || (int)opcnt != cisc_operand() ) {
+    rm = _opnds[opcnt]->in_RegMask(idx-skipped);
+  }
+  return *rm;
+}
+
+//-----------------------------memory_inputs--------------------------------
+const MachOper*  MachNode::memory_inputs(Node* &base, Node* &index) const {
+  const MachOper* oper = memory_operand();
+
+  if (oper == (MachOper*)-1) {
+    base = NodeSentinel;
+    index = NodeSentinel;
+  } else {
+    base = NULL;
+    index = NULL;
+    if (oper != NULL) {
+      // It has a unique memory operand.  Find its index.
+      int oper_idx = num_opnds();
+      while (--oper_idx >= 0) {
+        if (_opnds[oper_idx] == oper)  break;
+      }
+      int oper_pos = operand_index(oper_idx);
+      int base_pos = oper->base_position();
+      if (base_pos >= 0) {
+        base = _in[oper_pos+base_pos];
+      }
+      int index_pos = oper->index_position();
+      if (index_pos >= 0) {
+        index = _in[oper_pos+index_pos];
+      }
+    }
+  }
+
+  return oper;
+}
+
+//-----------------------------get_base_and_disp----------------------------
+const Node* MachNode::get_base_and_disp(intptr_t &offset, const TypePtr* &adr_type) const {
+
+  // Find the memory inputs using our helper function
+  Node* base;
+  Node* index;
+  const MachOper* oper = memory_inputs(base, index);
+
+  if (oper == NULL) {
+    // Base has been set to NULL
+    offset = 0;
+  } else if (oper == (MachOper*)-1) {
+    // Base has been set to NodeSentinel
+    // There is not a unique memory use here.  We will fall to AliasIdxBot.
+    offset = Type::OffsetBot;
+  } else {
+    // Base may be NULL, even if offset turns out to be != 0
+
+    intptr_t disp = oper->constant_disp();
+    int scale = oper->scale();
+    // Now we have collected every part of the ADLC MEMORY_INTER.
+    // See if it adds up to a base + offset.
+    if (index != NULL) {
+      if (!index->is_Con()) {
+        disp = Type::OffsetBot;
+      } else if (disp != Type::OffsetBot) {
+        const TypeX* ti = index->bottom_type()->isa_intptr_t();
+        if (ti == NULL) {
+          disp = Type::OffsetBot;  // a random constant??
+        } else {
+          disp += ti->get_con() << scale;
+        }
+      }
+    }
+    offset = disp;
+
+    // In i486.ad, indOffset32X uses base==RegI and disp==RegP,
+    // this will prevent alias analysis without the following support:
+    // Lookup the TypePtr used by indOffset32X, a compile-time constant oop,
+    // Add the offset determined by the "base", or use Type::OffsetBot.
+    if( adr_type == TYPE_PTR_SENTINAL ) {
+      const TypePtr *t_disp = oper->disp_as_type();  // only !NULL for indOffset32X
+      if (t_disp != NULL) {
+        offset = Type::OffsetBot;
+        const Type* t_base = base->bottom_type();
+        if (t_base->isa_intptr_t()) {
+          const TypeX *t_offset = t_base->is_intptr_t();
+          if( t_offset->is_con() ) {
+            offset = t_offset->get_con();
+          }
+        }
+        adr_type = t_disp->add_offset(offset);
+      }
+    }
+
+  }
+  return base;
+}
+
+
+//---------------------------------adr_type---------------------------------
+const class TypePtr *MachNode::adr_type() const {
+  intptr_t offset = 0;
+  const TypePtr *adr_type = TYPE_PTR_SENTINAL;  // attempt computing adr_type
+  const Node *base = get_base_and_disp(offset, adr_type);
+  if( adr_type != TYPE_PTR_SENTINAL ) {
+    return adr_type;      // get_base_and_disp has the answer
+  }
+
+  // Direct addressing modes have no base node, simply an indirect
+  // offset, which is always to raw memory.
+  // %%%%% Someday we'd like to allow constant oop offsets which
+  // would let Intel load from static globals in 1 instruction.
+  // Currently Intel requires 2 instructions and a register temp.
+  if (base == NULL) {
+    // NULL base, zero offset means no memory at all (a null pointer!)
+    if (offset == 0) {
+      return NULL;
+    }
+    // NULL base, any offset means any pointer whatever
+    if (offset == Type::OffsetBot) {
+      return TypePtr::BOTTOM;
+    }
+    // %%% make offset be intptr_t
+    assert(!Universe::heap()->is_in_reserved((oop)offset), "must be a raw ptr");
+    return TypeRawPtr::BOTTOM;
+  }
+
+  // base of -1 with no particular offset means all of memory
+  if (base == NodeSentinel)  return TypePtr::BOTTOM;
+
+  const Type* t = base->bottom_type();
+  if (t->isa_intptr_t() && offset != 0 && offset != Type::OffsetBot) {
+    // We cannot assert that the offset does not look oop-ish here.
+    // Depending on the heap layout the cardmark base could land
+    // inside some oopish region.  It definitely does for Win2K.
+    // The sum of cardmark-base plus shift-by-9-oop lands outside
+    // the oop-ish area but we can't assert for that statically.
+    return TypeRawPtr::BOTTOM;
+  }
+
+  const TypePtr *tp = t->isa_ptr();
+
+  // be conservative if we do not recognize the type
+  if (tp == NULL) {
+    return TypePtr::BOTTOM;
+  }
+  assert(tp->base() != Type::AnyPtr, "not a bare pointer");
+
+  return tp->add_offset(offset);
+}
+
+
+//-----------------------------operand_index---------------------------------
+int MachNode::operand_index( uint operand ) const {
+  if( operand < 1 )  return -1;
+  assert(operand < num_opnds(), "oob");
+  if( _opnds[operand]->num_edges() == 0 )  return -1;
+
+  uint skipped   = oper_input_base(); // Sum of leaves skipped so far
+  for (uint opcnt = 1; opcnt < operand; opcnt++) {
+    uint num_edges = _opnds[opcnt]->num_edges(); // leaves for operand
+    skipped += num_edges;
+  }
+  return skipped;
+}
+
+
+//------------------------------negate-----------------------------------------
+// Negate conditional branches.  Error for non-branch Nodes
+void MachNode::negate() {
+  ShouldNotCallThis();
+}
+
+//------------------------------peephole---------------------------------------
+// Apply peephole rule(s) to this instruction
+MachNode *MachNode::peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C ) {
+  return NULL;
+}
+
+//------------------------------add_case_label---------------------------------
+// Adds the label for the case
+void MachNode::add_case_label( int index_num, Label* blockLabel) {
+  ShouldNotCallThis();
+}
+
+//------------------------------label_set--------------------------------------
+// Set the Label for a LabelOper, if an operand for this instruction
+void MachNode::label_set( Label& label, uint block_num ) {
+  ShouldNotCallThis();
+}
+
+//------------------------------method_set-------------------------------------
+// Set the absolute address of a method
+void MachNode::method_set( intptr_t addr ) {
+  ShouldNotCallThis();
+}
+
+//------------------------------rematerialize----------------------------------
+bool MachNode::rematerialize() const {
+  // Temps are always rematerializable
+  if (is_MachTemp()) return true;
+
+  uint r = rule();              // Match rule
+  if( r <  Matcher::_begin_rematerialize ||
+      r >= Matcher::_end_rematerialize )
+    return false;
+
+  // For 2-address instructions, the input live range is also the output
+  // live range.  Remateralizing does not make progress on the that live range.
+  if( two_adr() )  return false;
+
+  // Check for rematerializing float constants, or not
+  if( !Matcher::rematerialize_float_constants ) {
+    int op = ideal_Opcode();
+    if( op == Op_ConF || op == Op_ConD )
+      return false;
+  }
+
+  // Defining flags - can't spill these!  Must remateralize.
+  if( ideal_reg() == Op_RegFlags )
+    return true;
+
+  // Stretching lots of inputs - don't do it.
+  if( req() > 2 )
+    return false;
+
+  // Don't remateralize somebody with bound inputs - it stretches a
+  // fixed register lifetime.
+  uint idx = oper_input_base();
+  if( req() > idx ) {
+    const RegMask &rm = in_RegMask(idx);
+    if( rm.is_bound1() || rm.is_bound2() )
+      return false;
+  }
+
+  return true;
+}
+
+#ifndef PRODUCT
+//------------------------------dump_spec--------------------------------------
+// Print any per-operand special info
+void MachNode::dump_spec(outputStream *st) const {
+  uint cnt = num_opnds();
+  for( uint i=0; i<cnt; i++ )
+    _opnds[i]->dump_spec(st);
+  const TypePtr *t = adr_type();
+  if( t ) {
+    Compile* C = Compile::current();
+    if( C->alias_type(t)->is_volatile() )
+      st->print(" Volatile!");
+  }
+}
+
+//------------------------------dump_format------------------------------------
+// access to virtual
+void MachNode::dump_format(PhaseRegAlloc *ra, outputStream *st) const {
+  format(ra, st); // access to virtual
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachTypeNode::dump_spec(outputStream *st) const {
+  _bottom_type->dump_on(st);
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachNullCheckNode::format( PhaseRegAlloc *ra_, outputStream *st ) const {
+  int reg = ra_->get_reg_first(in(1)->in(_vidx));
+  tty->print("%s %s", Name(), Matcher::regName[reg]);
+}
+#endif
+
+void MachNullCheckNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
+  // only emits entries in the null-pointer exception handler table
+}
+
+const RegMask &MachNullCheckNode::in_RegMask( uint idx ) const {
+  if( idx == 0 ) return RegMask::Empty;
+  else return in(1)->as_Mach()->out_RegMask();
+}
+
+//=============================================================================
+const Type *MachProjNode::bottom_type() const {
+  if( _ideal_reg == fat_proj ) return Type::BOTTOM;
+  // Try the normal mechanism first
+  const Type *t = in(0)->bottom_type();
+  if( t->base() == Type::Tuple ) {
+    const TypeTuple *tt = t->is_tuple();
+    if (_con < tt->cnt())
+      return tt->field_at(_con);
+  }
+  // Else use generic type from ideal register set
+  assert((uint)_ideal_reg < (uint)_last_machine_leaf && Type::mreg2type[_ideal_reg], "in bounds");
+  return Type::mreg2type[_ideal_reg];
+}
+
+const TypePtr *MachProjNode::adr_type() const {
+  if (bottom_type() == Type::MEMORY) {
+    // in(0) might be a narrow MemBar; otherwise we will report TypePtr::BOTTOM
+    const TypePtr* adr_type = in(0)->adr_type();
+    #ifdef ASSERT
+    if (!is_error_reported() && !Node::in_dump())
+      assert(adr_type != NULL, "source must have adr_type");
+    #endif
+    return adr_type;
+  }
+  assert(bottom_type()->base() != Type::Memory, "no other memories?");
+  return NULL;
+}
+
+#ifndef PRODUCT
+void MachProjNode::dump_spec(outputStream *st) const {
+  ProjNode::dump_spec(st);
+  switch (_ideal_reg) {
+  case unmatched_proj:  st->print("/unmatched");                         break;
+  case fat_proj:        st->print("/fat"); if (WizardMode) _rout.dump(); break;
+  }
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachIfNode::dump_spec(outputStream *st) const {
+  st->print("P=%f, C=%f",_prob, _fcnt);
+}
+#endif
+
+//=============================================================================
+uint MachReturnNode::size_of() const { return sizeof(*this); }
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachReturnNode::in_RegMask( uint idx ) const {
+  return _in_rms[idx];
+}
+
+const TypePtr *MachReturnNode::adr_type() const {
+  // most returns and calls are assumed to consume & modify all of memory
+  // the matcher will copy non-wide adr_types from ideal originals
+  return _adr_type;
+}
+
+//=============================================================================
+const Type *MachSafePointNode::bottom_type() const {  return TypeTuple::MEMBAR; }
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachSafePointNode::in_RegMask( uint idx ) const {
+  // Values in the domain use the users calling convention, embodied in the
+  // _in_rms array of RegMasks.
+  if( idx < TypeFunc::Parms ) return _in_rms[idx];
+
+  if (SafePointNode::needs_polling_address_input() &&
+      idx == TypeFunc::Parms &&
+      ideal_Opcode() == Op_SafePoint) {
+    return MachNode::in_RegMask(idx);
+  }
+
+  // Values outside the domain represent debug info
+  return *Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()];
+}
+
+
+//=============================================================================
+
+uint MachCallNode::cmp( const Node &n ) const
+{ return _tf == ((MachCallNode&)n)._tf; }
+const Type *MachCallNode::bottom_type() const { return tf()->range(); }
+const Type *MachCallNode::Value(PhaseTransform *phase) const { return tf()->range(); }
+
+#ifndef PRODUCT
+void MachCallNode::dump_spec(outputStream *st) const {
+  st->print("# ");
+  tf()->dump_on(st);
+  if (_cnt != COUNT_UNKNOWN)  st->print(" C=%f",_cnt);
+  if (jvms() != NULL)  jvms()->dump_spec(st);
+}
+#endif
+
+
+bool MachCallNode::return_value_is_used() const {
+  if (tf()->range()->cnt() == TypeFunc::Parms) {
+    // void return
+    return false;
+  }
+
+  // find the projection corresponding to the return value
+  for (DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++) {
+    Node *use = fast_out(i);
+    if (!use->is_Proj()) continue;
+    if (use->as_Proj()->_con == TypeFunc::Parms) {
+      return true;
+    }
+  }
+  return false;
+}
+
+
+//------------------------------Registers--------------------------------------
+const RegMask &MachCallNode::in_RegMask( uint idx ) const {
+  // Values in the domain use the users calling convention, embodied in the
+  // _in_rms array of RegMasks.
+  if (idx < tf()->domain()->cnt())  return _in_rms[idx];
+  // Values outside the domain represent debug info
+  return *Compile::current()->matcher()->idealreg2debugmask[in(idx)->ideal_reg()];
+}
+
+//=============================================================================
+uint MachCallJavaNode::size_of() const { return sizeof(*this); }
+uint MachCallJavaNode::cmp( const Node &n ) const {
+  MachCallJavaNode &call = (MachCallJavaNode&)n;
+  return MachCallNode::cmp(call) && _method->equals(call._method);
+}
+#ifndef PRODUCT
+void MachCallJavaNode::dump_spec(outputStream *st) const {
+  if( _method ) {
+    _method->print_short_name(st);
+    st->print(" ");
+  }
+  MachCallNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+uint MachCallStaticJavaNode::size_of() const { return sizeof(*this); }
+uint MachCallStaticJavaNode::cmp( const Node &n ) const {
+  MachCallStaticJavaNode &call = (MachCallStaticJavaNode&)n;
+  return MachCallJavaNode::cmp(call) && _name == call._name;
+}
+
+//----------------------------uncommon_trap_request----------------------------
+// If this is an uncommon trap, return the request code, else zero.
+int MachCallStaticJavaNode::uncommon_trap_request() const {
+  if (_name != NULL && !strcmp(_name, "uncommon_trap")) {
+    return CallStaticJavaNode::extract_uncommon_trap_request(this);
+  }
+  return 0;
+}
+
+#ifndef PRODUCT
+// Helper for summarizing uncommon_trap arguments.
+void MachCallStaticJavaNode::dump_trap_args(outputStream *st) const {
+  int trap_req = uncommon_trap_request();
+  if (trap_req != 0) {
+    char buf[100];
+    st->print("(%s)",
+               Deoptimization::format_trap_request(buf, sizeof(buf),
+                                                   trap_req));
+  }
+}
+
+void MachCallStaticJavaNode::dump_spec(outputStream *st) const {
+  st->print("Static ");
+  if (_name != NULL) {
+    st->print("wrapper for: %s", _name );
+    dump_trap_args(st);
+    st->print(" ");
+  }
+  MachCallJavaNode::dump_spec(st);
+}
+#endif
+
+//=============================================================================
+#ifndef PRODUCT
+void MachCallDynamicJavaNode::dump_spec(outputStream *st) const {
+  st->print("Dynamic ");
+  MachCallJavaNode::dump_spec(st);
+}
+#endif
+//=============================================================================
+uint MachCallRuntimeNode::size_of() const { return sizeof(*this); }
+uint MachCallRuntimeNode::cmp( const Node &n ) const {
+  MachCallRuntimeNode &call = (MachCallRuntimeNode&)n;
+  return MachCallNode::cmp(call) && !strcmp(_name,call._name);
+}
+#ifndef PRODUCT
+void MachCallRuntimeNode::dump_spec(outputStream *st) const {
+  st->print("%s ",_name);
+  MachCallNode::dump_spec(st);
+}
+#endif
+//=============================================================================
+// A shared JVMState for all HaltNodes.  Indicates the start of debug info
+// is at TypeFunc::Parms.  Only required for SOE register spill handling -
+// to indicate where the stack-slot-only debug info inputs begin.
+// There is no other JVM state needed here.
+JVMState jvms_for_throw(0);
+JVMState *MachHaltNode::jvms() const {
+  return &jvms_for_throw;
+}
+
+//=============================================================================
+#ifndef PRODUCT
+void labelOper::int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const {
+  st->print("B%d", _block_num);
+}
+#endif // PRODUCT
+
+//=============================================================================
+#ifndef PRODUCT
+void methodOper::int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const {
+  st->print(INTPTR_FORMAT, _method);
+}
+#endif // PRODUCT
diff --git a/src/share/vm/opto/machnode.hpp b/src/share/vm/opto/machnode.hpp
new file mode 100644
index 000000000..3c24a3e5c
--- /dev/null
+++ b/src/share/vm/opto/machnode.hpp
@@ -0,0 +1,826 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class BufferBlob;
+class CodeBuffer;
+class JVMState;
+class MachCallDynamicJavaNode;
+class MachCallJavaNode;
+class MachCallLeafNode;
+class MachCallNode;
+class MachCallRuntimeNode;
+class MachCallStaticJavaNode;
+class MachEpilogNode;
+class MachIfNode;
+class MachNullCheckNode;
+class MachOper;
+class MachProjNode;
+class MachPrologNode;
+class MachReturnNode;
+class MachSafePointNode;
+class MachSpillCopyNode;
+class Matcher;
+class PhaseRegAlloc;
+class RegMask;
+class State;
+
+//---------------------------MachOper------------------------------------------
+class MachOper : public ResourceObj {
+public:
+  // Allocate right next to the MachNodes in the same arena
+  void *operator new( size_t x, Compile* C ) { return C->node_arena()->Amalloc_D(x); }
+
+  // Opcode
+  virtual uint opcode() const = 0;
+
+  // Number of input edges.
+  // Generally at least 1
+  virtual uint num_edges() const { return 1; }
+  // Array of Register masks
+  virtual const RegMask *in_RegMask(int index) const;
+
+  // Methods to output the encoding of the operand
+
+  // Negate conditional branches.  Error for non-branch Nodes
+  virtual void negate();
+
+  // Return the value requested
+  // result register lookup, corresponding to int_format
+  virtual int  reg(PhaseRegAlloc *ra_, const Node *node)   const;
+  // input register lookup, corresponding to ext_format
+  virtual int  reg(PhaseRegAlloc *ra_, const Node *node, int idx)   const;
+
+  // helpers for MacroAssembler generation from ADLC
+  Register  as_Register(PhaseRegAlloc *ra_, const Node *node)   const {
+    return ::as_Register(reg(ra_, node));
+  }
+  Register  as_Register(PhaseRegAlloc *ra_, const Node *node, int idx)   const {
+    return ::as_Register(reg(ra_, node, idx));
+  }
+  FloatRegister  as_FloatRegister(PhaseRegAlloc *ra_, const Node *node)   const {
+    return ::as_FloatRegister(reg(ra_, node));
+  }
+  FloatRegister  as_FloatRegister(PhaseRegAlloc *ra_, const Node *node, int idx)   const {
+    return ::as_FloatRegister(reg(ra_, node, idx));
+  }
+
+#if defined(IA32) || defined(AMD64)
+  XMMRegister  as_XMMRegister(PhaseRegAlloc *ra_, const Node *node)   const {
+    return ::as_XMMRegister(reg(ra_, node));
+  }
+  XMMRegister  as_XMMRegister(PhaseRegAlloc *ra_, const Node *node, int idx)   const {
+    return ::as_XMMRegister(reg(ra_, node, idx));
+  }
+#endif
+
+  virtual intptr_t  constant() const;
+  virtual bool constant_is_oop() const;
+  virtual jdouble constantD() const;
+  virtual jfloat  constantF() const;
+  virtual jlong   constantL() const;
+  virtual TypeOopPtr *oop() const;
+  virtual int  ccode() const;
+  // A zero, default, indicates this value is not needed.
+  // May need to lookup the base register, as done in int_ and ext_format
+  virtual int  base (PhaseRegAlloc *ra_, const Node *node, int idx) const;
+  virtual int  index(PhaseRegAlloc *ra_, const Node *node, int idx) const;
+  virtual int  scale() const;
+  // Parameters needed to support MEMORY_INTERFACE access to stackSlot
+  virtual int  disp (PhaseRegAlloc *ra_, const Node *node, int idx) const;
+  // Check for PC-Relative displacement
+  virtual bool disp_is_oop() const;
+  virtual int  constant_disp() const;   // usu. 0, may return Type::OffsetBot
+  virtual int  base_position()  const;  // base edge position, or -1
+  virtual int  index_position() const;  // index edge position, or -1
+
+  // Access the TypeKlassPtr of operands with a base==RegI and disp==RegP
+  // Only returns non-null value for i486.ad's indOffset32X
+  virtual const TypePtr *disp_as_type() const { return NULL; }
+
+  // Return the label
+  virtual Label *label() const;
+
+  // Return the method's address
+  virtual intptr_t  method() const;
+
+  // Hash and compare over operands are currently identical
+  virtual uint  hash() const;
+  virtual uint  cmp( const MachOper &oper ) const;
+
+  // Virtual clone, since I do not know how big the MachOper is.
+  virtual MachOper *clone(Compile* C) const = 0;
+
+  // Return ideal Type from simple operands.  Fail for complex operands.
+  virtual const Type *type() const;
+
+  // Set an integer offset if we have one, or error otherwise
+  virtual void set_con( jint c0 ) { ShouldNotReachHere();  }
+
+#ifndef PRODUCT
+  // Return name of operand
+  virtual const char    *Name() const { return "???";}
+
+  // Methods to output the text version of the operand
+  virtual void int_format(PhaseRegAlloc *,const MachNode *node, outputStream *st) const = 0;
+  virtual void ext_format(PhaseRegAlloc *,const MachNode *node,int idx, outputStream *st) const=0;
+
+  virtual void dump_spec(outputStream *st) const; // Print per-operand info
+#endif
+};
+
+//------------------------------MachNode---------------------------------------
+// Base type for all machine specific nodes.  All node classes generated by the
+// ADLC inherit from this class.
+class MachNode : public Node {
+public:
+  MachNode() : Node((uint)0), _num_opnds(0), _opnds(NULL) {
+    init_class_id(Class_Mach);
+  }
+  // Required boilerplate
+  virtual uint size_of() const { return sizeof(MachNode); }
+  virtual int  Opcode() const;          // Always equal to MachNode
+  virtual uint rule() const = 0;        // Machine-specific opcode
+  // Number of inputs which come before the first operand.
+  // Generally at least 1, to skip the Control input
+  virtual uint oper_input_base() const { return 1; }
+
+  // Copy inputs and operands to new node of instruction.
+  // Called from cisc_version() and short_branch_version().
+  // !!!! The method's body is defined in ad_<arch>.cpp file.
+  void fill_new_machnode(MachNode *n, Compile* C) const;
+
+  // Return an equivalent instruction using memory for cisc_operand position
+  virtual MachNode *cisc_version(int offset, Compile* C);
+  // Modify this instruction's register mask to use stack version for cisc_operand
+  virtual void use_cisc_RegMask();
+
+  // Support for short branches
+  virtual MachNode *short_branch_version(Compile* C) { return NULL; }
+  bool may_be_short_branch() const { return (flags() & Flag_may_be_short_branch) != 0; }
+
+  // First index in _in[] corresponding to operand, or -1 if there is none
+  int  operand_index(uint operand) const;
+
+  // Register class input is expected in
+  virtual const RegMask &in_RegMask(uint) const;
+
+  // cisc-spillable instructions redefine for use by in_RegMask
+  virtual const RegMask *cisc_RegMask() const { return NULL; }
+
+  // If this instruction is a 2-address instruction, then return the
+  // index of the input which must match the output.  Not nessecary
+  // for instructions which bind the input and output register to the
+  // same singleton regiser (e.g., Intel IDIV which binds AX to be
+  // both an input and an output).  It is nessecary when the input and
+  // output have choices - but they must use the same choice.
+  virtual uint two_adr( ) const { return 0; }
+
+  // Array of complex operand pointers.  Each corresponds to zero or
+  // more leafs.  Must be set by MachNode constructor to point to an
+  // internal array of MachOpers.  The MachOper array is sized by
+  // specific MachNodes described in the ADL.
+  uint _num_opnds;
+  MachOper **_opnds;
+  uint  num_opnds() const { return _num_opnds; }
+
+  // Emit bytes into cbuf
+  virtual void  emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  // Size of instruction in bytes
+  virtual uint  size(PhaseRegAlloc *ra_) const;
+  // Helper function that computes size by emitting code
+  virtual uint  emit_size(PhaseRegAlloc *ra_) const;
+
+  // Return the alignment required (in units of relocInfo::addr_unit())
+  // for this instruction (must be a power of 2)
+  virtual int   alignment_required() const { return 1; }
+
+  // Return the padding (in bytes) to be emitted before this
+  // instruction to properly align it.
+  virtual int   compute_padding(int current_offset) const { return 0; }
+
+  // Return number of relocatable values contained in this instruction
+  virtual int   reloc() const { return 0; }
+
+  // Return number of words used for double constants in this instruction
+  virtual int   const_size() const { return 0; }
+
+  // Hash and compare over operands.  Used to do GVN on machine Nodes.
+  virtual uint  hash() const;
+  virtual uint  cmp( const Node &n ) const;
+
+  // Expand method for MachNode, replaces nodes representing pseudo
+  // instructions with a set of nodes which represent real machine
+  // instructions and compute the same value.
+  virtual MachNode *Expand( State *, Node_List &proj_list ) { return this; }
+
+  // Bottom_type call; value comes from operand0
+  virtual const class Type *bottom_type() const { return _opnds[0]->type(); }
+  virtual uint ideal_reg() const { const Type *t = _opnds[0]->type(); return t == TypeInt::CC ? Op_RegFlags : Matcher::base2reg[t->base()]; }
+
+  // If this is a memory op, return the base pointer and fixed offset.
+  // If there are no such, return NULL.  If there are multiple addresses
+  // or the address is indeterminate (rare cases) then return (Node*)-1,
+  // which serves as node bottom.
+  // If the offset is not statically determined, set it to Type::OffsetBot.
+  // This method is free to ignore stack slots if that helps.
+  #define TYPE_PTR_SENTINAL  ((const TypePtr*)-1)
+  // Passing TYPE_PTR_SENTINAL as adr_type asks for computation of the adr_type if possible
+  const Node* get_base_and_disp(intptr_t &offset, const TypePtr* &adr_type) const;
+
+  // Helper for get_base_and_disp: find the base and index input nodes.
+  // Returns the MachOper as determined by memory_operand(), for use, if
+  // needed by the caller. If (MachOper *)-1 is returned, base and index
+  // are set to NodeSentinel. If (MachOper *) NULL is returned, base and
+  // index are set to NULL.
+  const MachOper* memory_inputs(Node* &base, Node* &index) const;
+
+  // Helper for memory_inputs:  Which operand carries the necessary info?
+  // By default, returns NULL, which means there is no such operand.
+  // If it returns (MachOper*)-1, this means there are multiple memories.
+  virtual const MachOper* memory_operand() const { return NULL; }
+
+  // Call "get_base_and_disp" to decide which category of memory is used here.
+  virtual const class TypePtr *adr_type() const;
+
+  // Negate conditional branches.  Error for non-branch Nodes
+  virtual void negate();
+
+  // Apply peephole rule(s) to this instruction
+  virtual MachNode *peephole( Block *block, int block_index, PhaseRegAlloc *ra_, int &deleted, Compile* C );
+
+  // Check for PC-Relative addressing
+  bool is_pc_relative() const { return (flags() & Flag_is_pc_relative) != 0; }
+
+  // Top-level ideal Opcode matched
+  virtual int ideal_Opcode()     const { return Op_Node; }
+
+  // Set the branch inside jump MachNodes.  Error for non-branch Nodes.
+  virtual void label_set( Label& label, uint block_num );
+
+  // Adds the label for the case
+  virtual void add_case_label( int switch_val, Label* blockLabel);
+
+  // Set the absolute address for methods
+  virtual void method_set( intptr_t addr );
+
+  // Should we clone rather than spill this instruction?
+  bool rematerialize() const;
+
+  // Get the pipeline info
+  static const Pipeline *pipeline_class();
+  virtual const Pipeline *pipeline() const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const = 0; // Machine-specific name
+  virtual void dump_spec(outputStream *st) const; // Print per-node info
+  void         dump_format(PhaseRegAlloc *ra, outputStream *st) const; // access to virtual
+#endif
+};
+
+//------------------------------MachIdealNode----------------------------
+// Machine specific versions of nodes that must be defined by user.
+// These are not converted by matcher from ideal nodes to machine nodes
+// but are inserted into the code by the compiler.
+class MachIdealNode : public MachNode {
+public:
+  MachIdealNode( ) {}
+
+  // Define the following defaults for non-matched machine nodes
+  virtual uint oper_input_base() const { return 0; }
+  virtual uint rule()            const { return 9999999; }
+  virtual const class Type *bottom_type() const { return _opnds == NULL ? Type::CONTROL : MachNode::bottom_type(); }
+};
+
+//------------------------------MachTypeNode----------------------------
+// Machine Nodes that need to retain a known Type.
+class MachTypeNode : public MachNode {
+  virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+  const Type *_bottom_type;
+
+  virtual const class Type *bottom_type() const { return _bottom_type; }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachBreakpointNode----------------------------
+// Machine breakpoint or interrupt Node
+class MachBreakpointNode : public MachIdealNode {
+public:
+  MachBreakpointNode( ) {}
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const { return "Breakpoint"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachUEPNode-----------------------------------
+// Machine Unvalidated Entry Point Node
+class MachUEPNode : public MachIdealNode {
+public:
+  MachUEPNode( ) {}
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const { return "Unvalidated-Entry-Point"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachPrologNode--------------------------------
+// Machine function Prolog Node
+class MachPrologNode : public MachIdealNode {
+public:
+  MachPrologNode( ) {}
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+  virtual int reloc() const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const { return "Prolog"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachEpilogNode--------------------------------
+// Machine function Epilog Node
+class MachEpilogNode : public MachIdealNode {
+public:
+  MachEpilogNode(bool do_poll = false) : _do_polling(do_poll) {}
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+  virtual int reloc() const;
+  virtual const Pipeline *pipeline() const;
+
+private:
+  bool _do_polling;
+
+public:
+  bool do_polling() const { return _do_polling; }
+
+  // Offset of safepoint from the beginning of the node
+  int safepoint_offset() const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const { return "Epilog"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachNopNode-----------------------------------
+// Machine function Nop Node
+class MachNopNode : public MachIdealNode {
+private:
+  int _count;
+public:
+  MachNopNode( ) : _count(1) {}
+  MachNopNode( int count ) : _count(count) {}
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+
+  virtual const class Type *bottom_type() const { return Type::CONTROL; }
+
+  virtual int ideal_Opcode() const { return Op_Con; } // bogus; see output.cpp
+  virtual const Pipeline *pipeline() const;
+#ifndef PRODUCT
+  virtual const char *Name() const { return "Nop"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+  virtual void dump_spec(outputStream *st) const { } // No per-operand info
+#endif
+};
+
+//------------------------------MachSpillCopyNode------------------------------
+// Machine SpillCopy Node.  Copies 1 or 2 words from any location to any
+// location (stack or register).
+class MachSpillCopyNode : public MachIdealNode {
+  const RegMask *_in;           // RegMask for input
+  const RegMask *_out;          // RegMask for output
+  const Type *_type;
+public:
+  MachSpillCopyNode( Node *n, const RegMask &in, const RegMask &out ) :
+    MachIdealNode(), _in(&in), _out(&out), _type(n->bottom_type()) {
+    init_class_id(Class_MachSpillCopy);
+    init_flags(Flag_is_Copy);
+    add_req(NULL);
+    add_req(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
+  void set_out_RegMask(const RegMask &out) { _out = &out; }
+  void set_in_RegMask(const RegMask &in) { _in = &in; }
+  virtual const RegMask &out_RegMask() const { return *_out; }
+  virtual const RegMask &in_RegMask(uint) const { return *_in; }
+  virtual const class Type *bottom_type() const { return _type; }
+  virtual uint ideal_reg() const { return Matcher::base2reg[_type->base()]; }
+  virtual uint oper_input_base() const { return 1; }
+  uint implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const;
+
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual uint size(PhaseRegAlloc *ra_) const;
+
+#ifndef PRODUCT
+  virtual const char *Name() const { return "MachSpillCopy"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachNullChkNode--------------------------------
+// Machine-dependent null-pointer-check Node.  Points a real MachNode that is
+// also some kind of memory op.  Turns the indicated MachNode into a
+// conditional branch with good latency on the ptr-not-null path and awful
+// latency on the pointer-is-null path.
+
+class MachNullCheckNode : public MachIdealNode {
+public:
+  const uint _vidx;             // Index of memop being tested
+  MachNullCheckNode( Node *ctrl, Node *memop, uint vidx ) : MachIdealNode(), _vidx(vidx) {
+    init_class_id(Class_MachNullCheck);
+    init_flags(Flag_is_Branch | Flag_is_pc_relative);
+    add_req(ctrl);
+    add_req(memop);
+  }
+
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  virtual bool pinned() const { return true; };
+  virtual void negate() { }
+  virtual const class Type *bottom_type() const { return TypeTuple::IFBOTH; }
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual const RegMask &out_RegMask() const { return RegMask::Empty; }
+#ifndef PRODUCT
+  virtual const char *Name() const { return "NullCheck"; }
+  virtual void format( PhaseRegAlloc *, outputStream *st ) const;
+#endif
+};
+
+//------------------------------MachProjNode----------------------------------
+// Machine-dependent Ideal projections (how is that for an oxymoron).  Really
+// just MachNodes made by the Ideal world that replicate simple projections
+// but with machine-dependent input & output register masks.  Generally
+// produced as part of calling conventions.  Normally I make MachNodes as part
+// of the Matcher process, but the Matcher is ill suited to issues involving
+// frame handling, so frame handling is all done in the Ideal world with
+// occasional callbacks to the machine model for important info.
+class MachProjNode : public ProjNode {
+public:
+  MachProjNode( Node *multi, uint con, const RegMask &out, uint ideal_reg ) : ProjNode(multi,con), _rout(out), _ideal_reg(ideal_reg) {}
+  RegMask _rout;
+  const uint  _ideal_reg;
+  enum projType {
+    unmatched_proj = 0,         // Projs for Control, I/O, memory not matched
+    fat_proj       = 999        // Projs killing many regs, defined by _rout
+  };
+  virtual int   Opcode() const;
+  virtual const Type *bottom_type() const;
+  virtual const TypePtr *adr_type() const;
+  virtual const RegMask &in_RegMask(uint) const { return RegMask::Empty; }
+  virtual const RegMask &out_RegMask() const { return _rout; }
+  virtual uint  ideal_reg() const { return _ideal_reg; }
+  // Need size_of() for virtual ProjNode::clone()
+  virtual uint  size_of() const { return sizeof(MachProjNode); }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachIfNode-------------------------------------
+// Machine-specific versions of IfNodes
+class MachIfNode : public MachNode {
+  virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+  float _prob;                  // Probability branch goes either way
+  float _fcnt;                  // Frequency counter
+  MachIfNode() : MachNode() {
+    init_class_id(Class_MachIf);
+  }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachFastLockNode-------------------------------------
+// Machine-specific versions of FastLockNodes
+class MachFastLockNode : public MachNode {
+  virtual uint size_of() const { return sizeof(*this); } // Size is bigger
+public:
+  BiasedLockingCounters* _counters;
+
+  MachFastLockNode() : MachNode() {}
+};
+
+//------------------------------MachReturnNode--------------------------------
+// Machine-specific versions of subroutine returns
+class MachReturnNode : public MachNode {
+  virtual uint size_of() const; // Size is bigger
+public:
+  RegMask *_in_rms;             // Input register masks, set during allocation
+  ReallocMark _nesting;         // assertion check for reallocations
+  const TypePtr* _adr_type;     // memory effects of call or return
+  MachReturnNode() : MachNode() {
+    init_class_id(Class_MachReturn);
+    _adr_type = TypePtr::BOTTOM; // the default: all of memory
+  }
+
+  void set_adr_type(const TypePtr* atp) { _adr_type = atp; }
+
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual bool pinned() const { return true; };
+  virtual const TypePtr *adr_type() const;
+};
+
+//------------------------------MachSafePointNode-----------------------------
+// Machine-specific versions of safepoints
+class MachSafePointNode : public MachReturnNode {
+public:
+  OopMap*         _oop_map;     // Array of OopMap info (8-bit char) for GC
+  JVMState*       _jvms;        // Pointer to list of JVM State Objects
+  uint            _jvmadj;      // Extra delta to jvms indexes (mach. args)
+  OopMap*         oop_map() const { return _oop_map; }
+  void            set_oop_map(OopMap* om) { _oop_map = om; }
+
+  MachSafePointNode() : MachReturnNode(), _oop_map(NULL), _jvms(NULL), _jvmadj(0) {
+    init_class_id(Class_MachSafePoint);
+    init_flags(Flag_is_safepoint_node);
+  }
+
+  virtual JVMState* jvms() const { return _jvms; }
+  void set_jvms(JVMState* s) {
+    _jvms = s;
+  }
+  bool is_safepoint_node() const { return (flags() & Flag_is_safepoint_node) != 0; }
+  virtual const Type    *bottom_type() const;
+
+  virtual const RegMask &in_RegMask(uint) const;
+
+  // Functionality from old debug nodes
+  Node *returnadr() const { return in(TypeFunc::ReturnAdr); }
+  Node *frameptr () const { return in(TypeFunc::FramePtr); }
+
+  Node *local(const JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(_jvmadj + jvms->locoff() + idx);
+  }
+  Node *stack(const JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(_jvmadj + jvms->stkoff() + idx);
+ }
+  Node *monitor_obj(const JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(_jvmadj + jvms->monitor_obj_offset(idx));
+  }
+  Node *monitor_box(const JVMState* jvms, uint idx) const {
+    assert(verify_jvms(jvms), "jvms must match");
+    return in(_jvmadj + jvms->monitor_box_offset(idx));
+  }
+  void  set_local(const JVMState* jvms, uint idx, Node *c) {
+    assert(verify_jvms(jvms), "jvms must match");
+    set_req(_jvmadj + jvms->locoff() + idx, c);
+  }
+  void  set_stack(const JVMState* jvms, uint idx, Node *c) {
+    assert(verify_jvms(jvms), "jvms must match");
+    set_req(_jvmadj + jvms->stkoff() + idx, c);
+  }
+  void  set_monitor(const JVMState* jvms, uint idx, Node *c) {
+    assert(verify_jvms(jvms), "jvms must match");
+    set_req(_jvmadj + jvms->monoff() + idx, c);
+  }
+};
+
+//------------------------------MachCallNode----------------------------------
+// Machine-specific versions of subroutine calls
+class MachCallNode : public MachSafePointNode {
+protected:
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const = 0; // Size is bigger
+public:
+  const TypeFunc *_tf;        // Function type
+  address      _entry_point;  // Address of the method being called
+  float        _cnt;          // Estimate of number of times called
+  uint         _argsize;      // Size of argument block on stack
+
+  const TypeFunc* tf()        const { return _tf; }
+  const address entry_point() const { return _entry_point; }
+  const float   cnt()         const { return _cnt; }
+  uint argsize()              const { return _argsize; }
+
+  void set_tf(const TypeFunc* tf) { _tf = tf; }
+  void set_entry_point(address p) { _entry_point = p; }
+  void set_cnt(float c)           { _cnt = c; }
+  void set_argsize(int s)         { _argsize = s; }
+
+  MachCallNode() : MachSafePointNode() {
+    init_class_id(Class_MachCall);
+    init_flags(Flag_is_Call);
+  }
+
+  virtual const Type *bottom_type() const;
+  virtual bool  pinned() const { return false; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const RegMask &in_RegMask(uint) const;
+  virtual int ret_addr_offset() { return 0; }
+
+  bool returns_long() const { return tf()->return_type() == T_LONG; }
+  bool return_value_is_used() const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallJavaNode------------------------------
+// "Base" class for machine-specific versions of subroutine calls
+class MachCallJavaNode : public MachCallNode {
+protected:
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  ciMethod* _method;             // Method being direct called
+  int        _bci;               // Byte Code index of call byte code
+  bool       _optimized_virtual; // Tells if node is a static call or an optimized virtual
+  MachCallJavaNode() : MachCallNode() {
+    init_class_id(Class_MachCallJava);
+  }
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallStaticJavaNode------------------------
+// Machine-specific versions of monomorphic subroutine calls
+class MachCallStaticJavaNode : public MachCallJavaNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  const char *_name;            // Runtime wrapper name
+  MachCallStaticJavaNode() : MachCallJavaNode() {
+    init_class_id(Class_MachCallStaticJava);
+  }
+
+  // If this is an uncommon trap, return the request code, else zero.
+  int uncommon_trap_request() const;
+
+  virtual int ret_addr_offset();
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+  void dump_trap_args(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallDynamicJavaNode------------------------
+// Machine-specific versions of possibly megamorphic subroutine calls
+class MachCallDynamicJavaNode : public MachCallJavaNode {
+public:
+  int _vtable_index;
+  MachCallDynamicJavaNode() : MachCallJavaNode() {
+    init_class_id(Class_MachCallDynamicJava);
+    DEBUG_ONLY(_vtable_index = -99);  // throw an assert if uninitialized
+  }
+  virtual int ret_addr_offset();
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------MachCallRuntimeNode----------------------------
+// Machine-specific versions of subroutine calls
+class MachCallRuntimeNode : public MachCallNode {
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+public:
+  const char *_name;            // Printable name, if _method is NULL
+  MachCallRuntimeNode() : MachCallNode() {
+    init_class_id(Class_MachCallRuntime);
+  }
+  virtual int ret_addr_offset();
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class MachCallLeafNode: public MachCallRuntimeNode {
+public:
+  MachCallLeafNode() : MachCallRuntimeNode() {
+    init_class_id(Class_MachCallLeaf);
+  }
+};
+
+//------------------------------MachHaltNode-----------------------------------
+// Machine-specific versions of halt nodes
+class MachHaltNode : public MachReturnNode {
+public:
+  virtual JVMState* jvms() const;
+};
+
+
+//------------------------------MachTempNode-----------------------------------
+// Node used by the adlc to construct inputs to represent temporary registers
+class MachTempNode : public MachNode {
+private:
+  MachOper *_opnd_array[1];
+
+public:
+  virtual const RegMask &out_RegMask() const { return *_opnds[0]->in_RegMask(0); }
+  virtual uint rule() const { return 9999999; }
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+
+  MachTempNode(MachOper* oper) {
+    init_class_id(Class_MachTemp);
+    _num_opnds = 1;
+    _opnds = _opnd_array;
+    add_req(NULL);
+    _opnds[0] = oper;
+  }
+  virtual uint size_of() const { return sizeof(MachTempNode); }
+
+#ifndef PRODUCT
+  virtual void format(PhaseRegAlloc *, outputStream *st ) const {}
+  virtual const char *Name() const { return "MachTemp";}
+#endif
+};
+
+
+
+//------------------------------labelOper--------------------------------------
+// Machine-independent version of label operand
+class labelOper : public MachOper {
+private:
+  virtual uint           num_edges() const { return 0; }
+public:
+  // Supported for fixed size branches
+  Label* _label;                // Label for branch(es)
+
+  uint _block_num;
+
+  labelOper() : _block_num(0), _label(0) {}
+
+  labelOper(Label* label, uint block_num) : _label(label), _block_num(block_num) {}
+
+  labelOper(labelOper* l) : _label(l->_label) , _block_num(l->_block_num) {}
+
+  virtual MachOper *clone(Compile* C) const;
+
+  virtual Label *label() const { return _label; }
+
+  virtual uint           opcode() const;
+
+  virtual uint           hash()   const;
+  virtual uint           cmp( const MachOper &oper ) const;
+#ifndef PRODUCT
+  virtual const char    *Name()   const { return "Label";}
+
+  virtual void int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const;
+  virtual void ext_format(PhaseRegAlloc *ra, const MachNode *node, int idx, outputStream *st) const { int_format( ra, node, st ); }
+#endif
+};
+
+
+//------------------------------methodOper--------------------------------------
+// Machine-independent version of method operand
+class methodOper : public MachOper {
+private:
+  virtual uint           num_edges() const { return 0; }
+public:
+  intptr_t _method;             // Address of method
+  methodOper() :   _method(0) {}
+  methodOper(intptr_t method) : _method(method)  {}
+
+  virtual MachOper *clone(Compile* C) const;
+
+  virtual intptr_t method() const { return _method; }
+
+  virtual uint           opcode() const;
+
+  virtual uint           hash()   const;
+  virtual uint           cmp( const MachOper &oper ) const;
+#ifndef PRODUCT
+  virtual const char    *Name()   const { return "Method";}
+
+  virtual void int_format(PhaseRegAlloc *ra, const MachNode *node, outputStream *st) const;
+  virtual void ext_format(PhaseRegAlloc *ra, const MachNode *node, int idx, outputStream *st) const { int_format( ra, node, st ); }
+#endif
+};
diff --git a/src/share/vm/opto/macro.cpp b/src/share/vm/opto/macro.cpp
new file mode 100644
index 000000000..9ba4bc3d4
--- /dev/null
+++ b/src/share/vm/opto/macro.cpp
@@ -0,0 +1,995 @@
+/*
+ * Copyright 2005-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_macro.cpp.incl"
+
+
+//
+// Replace any references to "oldref" in inputs to "use" with "newref".
+// Returns the number of replacements made.
+//
+int PhaseMacroExpand::replace_input(Node *use, Node *oldref, Node *newref) {
+  int nreplacements = 0;
+  uint req = use->req();
+  for (uint j = 0; j < use->len(); j++) {
+    Node *uin = use->in(j);
+    if (uin == oldref) {
+      if (j < req)
+        use->set_req(j, newref);
+      else
+        use->set_prec(j, newref);
+      nreplacements++;
+    } else if (j >= req && uin == NULL) {
+      break;
+    }
+  }
+  return nreplacements;
+}
+
+void PhaseMacroExpand::copy_call_debug_info(CallNode *oldcall, CallNode * newcall) {
+  // Copy debug information and adjust JVMState information
+  uint old_dbg_start = oldcall->tf()->domain()->cnt();
+  uint new_dbg_start = newcall->tf()->domain()->cnt();
+  int jvms_adj  = new_dbg_start - old_dbg_start;
+  assert (new_dbg_start == newcall->req(), "argument count mismatch");
+  for (uint i = old_dbg_start; i < oldcall->req(); i++) {
+    newcall->add_req(oldcall->in(i));
+  }
+  newcall->set_jvms(oldcall->jvms());
+  for (JVMState *jvms = newcall->jvms(); jvms != NULL; jvms = jvms->caller()) {
+    jvms->set_map(newcall);
+    jvms->set_locoff(jvms->locoff()+jvms_adj);
+    jvms->set_stkoff(jvms->stkoff()+jvms_adj);
+    jvms->set_monoff(jvms->monoff()+jvms_adj);
+    jvms->set_endoff(jvms->endoff()+jvms_adj);
+  }
+}
+
+Node* PhaseMacroExpand::opt_iff(Node* region, Node* iff) {
+  IfNode *opt_iff = transform_later(iff)->as_If();
+
+  // Fast path taken; set region slot 2
+  Node *fast_taken = transform_later( new (C, 1) IfFalseNode(opt_iff) );
+  region->init_req(2,fast_taken); // Capture fast-control
+
+  // Fast path not-taken, i.e. slow path
+  Node *slow_taken = transform_later( new (C, 1) IfTrueNode(opt_iff) );
+  return slow_taken;
+}
+
+//--------------------copy_predefined_input_for_runtime_call--------------------
+void PhaseMacroExpand::copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call) {
+  // Set fixed predefined input arguments
+  call->init_req( TypeFunc::Control, ctrl );
+  call->init_req( TypeFunc::I_O    , oldcall->in( TypeFunc::I_O) );
+  call->init_req( TypeFunc::Memory , oldcall->in( TypeFunc::Memory ) ); // ?????
+  call->init_req( TypeFunc::ReturnAdr, oldcall->in( TypeFunc::ReturnAdr ) );
+  call->init_req( TypeFunc::FramePtr, oldcall->in( TypeFunc::FramePtr ) );
+}
+
+//------------------------------make_slow_call---------------------------------
+CallNode* PhaseMacroExpand::make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call, const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1) {
+
+  // Slow-path call
+  int size = slow_call_type->domain()->cnt();
+ CallNode *call = leaf_name
+   ? (CallNode*)new (C, size) CallLeafNode      ( slow_call_type, slow_call, leaf_name, TypeRawPtr::BOTTOM )
+   : (CallNode*)new (C, size) CallStaticJavaNode( slow_call_type, slow_call, OptoRuntime::stub_name(slow_call), oldcall->jvms()->bci(), TypeRawPtr::BOTTOM );
+
+  // Slow path call has no side-effects, uses few values
+  copy_predefined_input_for_runtime_call(slow_path, oldcall, call );
+  if (parm0 != NULL)  call->init_req(TypeFunc::Parms+0, parm0);
+  if (parm1 != NULL)  call->init_req(TypeFunc::Parms+1, parm1);
+  copy_call_debug_info(oldcall, call);
+  call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
+  _igvn.hash_delete(oldcall);
+  _igvn.subsume_node(oldcall, call);
+  transform_later(call);
+
+  return call;
+}
+
+void PhaseMacroExpand::extract_call_projections(CallNode *call) {
+  _fallthroughproj = NULL;
+  _fallthroughcatchproj = NULL;
+  _ioproj_fallthrough = NULL;
+  _ioproj_catchall = NULL;
+  _catchallcatchproj = NULL;
+  _memproj_fallthrough = NULL;
+  _memproj_catchall = NULL;
+  _resproj = NULL;
+  for (DUIterator_Fast imax, i = call->fast_outs(imax); i < imax; i++) {
+    ProjNode *pn = call->fast_out(i)->as_Proj();
+    switch (pn->_con) {
+      case TypeFunc::Control:
+      {
+        // For Control (fallthrough) and I_O (catch_all_index) we have CatchProj -> Catch -> Proj
+        _fallthroughproj = pn;
+        DUIterator_Fast jmax, j = pn->fast_outs(jmax);
+        const Node *cn = pn->fast_out(j);
+        if (cn->is_Catch()) {
+          ProjNode *cpn = NULL;
+          for (DUIterator_Fast kmax, k = cn->fast_outs(kmax); k < kmax; k++) {
+            cpn = cn->fast_out(k)->as_Proj();
+            assert(cpn->is_CatchProj(), "must be a CatchProjNode");
+            if (cpn->_con == CatchProjNode::fall_through_index)
+              _fallthroughcatchproj = cpn;
+            else {
+              assert(cpn->_con == CatchProjNode::catch_all_index, "must be correct index.");
+              _catchallcatchproj = cpn;
+            }
+          }
+        }
+        break;
+      }
+      case TypeFunc::I_O:
+        if (pn->_is_io_use)
+          _ioproj_catchall = pn;
+        else
+          _ioproj_fallthrough = pn;
+        break;
+      case TypeFunc::Memory:
+        if (pn->_is_io_use)
+          _memproj_catchall = pn;
+        else
+          _memproj_fallthrough = pn;
+        break;
+      case TypeFunc::Parms:
+        _resproj = pn;
+        break;
+      default:
+        assert(false, "unexpected projection from allocation node.");
+    }
+  }
+
+}
+
+
+//---------------------------set_eden_pointers-------------------------
+void PhaseMacroExpand::set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr) {
+  if (UseTLAB) {                // Private allocation: load from TLS
+    Node* thread = transform_later(new (C, 1) ThreadLocalNode());
+    int tlab_top_offset = in_bytes(JavaThread::tlab_top_offset());
+    int tlab_end_offset = in_bytes(JavaThread::tlab_end_offset());
+    eden_top_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_top_offset);
+    eden_end_adr = basic_plus_adr(top()/*not oop*/, thread, tlab_end_offset);
+  } else {                      // Shared allocation: load from globals
+    CollectedHeap* ch = Universe::heap();
+    address top_adr = (address)ch->top_addr();
+    address end_adr = (address)ch->end_addr();
+    eden_top_adr = makecon(TypeRawPtr::make(top_adr));
+    eden_end_adr = basic_plus_adr(eden_top_adr, end_adr - top_adr);
+  }
+}
+
+
+Node* PhaseMacroExpand::make_load(Node* ctl, Node* mem, Node* base, int offset, const Type* value_type, BasicType bt) {
+  Node* adr = basic_plus_adr(base, offset);
+  const TypePtr* adr_type = TypeRawPtr::BOTTOM;
+  Node* value = LoadNode::make(C, ctl, mem, adr, adr_type, value_type, bt);
+  transform_later(value);
+  return value;
+}
+
+
+Node* PhaseMacroExpand::make_store(Node* ctl, Node* mem, Node* base, int offset, Node* value, BasicType bt) {
+  Node* adr = basic_plus_adr(base, offset);
+  mem = StoreNode::make(C, ctl, mem, adr, NULL, value, bt);
+  transform_later(mem);
+  return mem;
+}
+
+//=============================================================================
+//
+//                              A L L O C A T I O N
+//
+// Allocation attempts to be fast in the case of frequent small objects.
+// It breaks down like this:
+//
+// 1) Size in doublewords is computed.  This is a constant for objects and
+// variable for most arrays.  Doubleword units are used to avoid size
+// overflow of huge doubleword arrays.  We need doublewords in the end for
+// rounding.
+//
+// 2) Size is checked for being 'too large'.  Too-large allocations will go
+// the slow path into the VM.  The slow path can throw any required
+// exceptions, and does all the special checks for very large arrays.  The
+// size test can constant-fold away for objects.  For objects with
+// finalizers it constant-folds the otherway: you always go slow with
+// finalizers.
+//
+// 3) If NOT using TLABs, this is the contended loop-back point.
+// Load-Locked the heap top.  If using TLABs normal-load the heap top.
+//
+// 4) Check that heap top + size*8 < max.  If we fail go the slow ` route.
+// NOTE: "top+size*8" cannot wrap the 4Gig line!  Here's why: for largish
+// "size*8" we always enter the VM, where "largish" is a constant picked small
+// enough that there's always space between the eden max and 4Gig (old space is
+// there so it's quite large) and large enough that the cost of entering the VM
+// is dwarfed by the cost to initialize the space.
+//
+// 5) If NOT using TLABs, Store-Conditional the adjusted heap top back
+// down.  If contended, repeat at step 3.  If using TLABs normal-store
+// adjusted heap top back down; there is no contention.
+//
+// 6) If !ZeroTLAB then Bulk-clear the object/array.  Fill in klass & mark
+// fields.
+//
+// 7) Merge with the slow-path; cast the raw memory pointer to the correct
+// oop flavor.
+//
+//=============================================================================
+// FastAllocateSizeLimit value is in DOUBLEWORDS.
+// Allocations bigger than this always go the slow route.
+// This value must be small enough that allocation attempts that need to
+// trigger exceptions go the slow route.  Also, it must be small enough so
+// that heap_top + size_in_bytes does not wrap around the 4Gig limit.
+//=============================================================================j//
+// %%% Here is an old comment from parseHelper.cpp; is it outdated?
+// The allocator will coalesce int->oop copies away.  See comment in
+// coalesce.cpp about how this works.  It depends critically on the exact
+// code shape produced here, so if you are changing this code shape
+// make sure the GC info for the heap-top is correct in and around the
+// slow-path call.
+//
+
+void PhaseMacroExpand::expand_allocate_common(
+            AllocateNode* alloc, // allocation node to be expanded
+            Node* length,  // array length for an array allocation
+            const TypeFunc* slow_call_type, // Type of slow call
+            address slow_call_address  // Address of slow call
+    )
+{
+
+  Node* ctrl = alloc->in(TypeFunc::Control);
+  Node* mem  = alloc->in(TypeFunc::Memory);
+  Node* i_o  = alloc->in(TypeFunc::I_O);
+  Node* size_in_bytes     = alloc->in(AllocateNode::AllocSize);
+  Node* klass_node        = alloc->in(AllocateNode::KlassNode);
+  Node* initial_slow_test = alloc->in(AllocateNode::InitialTest);
+
+  Node* eden_top_adr;
+  Node* eden_end_adr;
+  set_eden_pointers(eden_top_adr, eden_end_adr);
+
+  uint raw_idx = C->get_alias_index(TypeRawPtr::BOTTOM);
+  assert(ctrl != NULL, "must have control");
+
+  // Load Eden::end.  Loop invariant and hoisted.
+  //
+  // Note: We set the control input on "eden_end" and "old_eden_top" when using
+  //       a TLAB to work around a bug where these values were being moved across
+  //       a safepoint.  These are not oops, so they cannot be include in the oop
+  //       map, but the can be changed by a GC.   The proper way to fix this would
+  //       be to set the raw memory state when generating a  SafepointNode.  However
+  //       this will require extensive changes to the loop optimization in order to
+  //       prevent a degradation of the optimization.
+  //       See comment in memnode.hpp, around line 227 in class LoadPNode.
+  Node* eden_end = make_load(ctrl, mem, eden_end_adr, 0, TypeRawPtr::BOTTOM, T_ADDRESS);
+
+  // We need a Region and corresponding Phi's to merge the slow-path and fast-path results.
+  // they will not be used if "always_slow" is set
+  enum { slow_result_path = 1, fast_result_path = 2 };
+  Node *result_region;
+  Node *result_phi_rawmem;
+  Node *result_phi_rawoop;
+  Node *result_phi_i_o;
+
+  // The initial slow comparison is a size check, the comparison
+  // we want to do is a BoolTest::gt
+  bool always_slow = false;
+  int tv = _igvn.find_int_con(initial_slow_test, -1);
+  if (tv >= 0) {
+    always_slow = (tv == 1);
+    initial_slow_test = NULL;
+  } else {
+    initial_slow_test = BoolNode::make_predicate(initial_slow_test, &_igvn);
+  }
+
+  if (DTraceAllocProbes) {
+    // Force slow-path allocation
+    always_slow = true;
+    initial_slow_test = NULL;
+  }
+
+  enum { too_big_or_final_path = 1, need_gc_path = 2 };
+  Node *slow_region = NULL;
+  Node *toobig_false = ctrl;
+
+  assert (initial_slow_test == NULL || !always_slow, "arguments must be consistent");
+  // generate the initial test if necessary
+  if (initial_slow_test != NULL ) {
+    slow_region = new (C, 3) RegionNode(3);
+
+    // Now make the initial failure test.  Usually a too-big test but
+    // might be a TRUE for finalizers or a fancy class check for
+    // newInstance0.
+    IfNode *toobig_iff = new (C, 2) IfNode(ctrl, initial_slow_test, PROB_MIN, COUNT_UNKNOWN);
+    transform_later(toobig_iff);
+    // Plug the failing-too-big test into the slow-path region
+    Node *toobig_true = new (C, 1) IfTrueNode( toobig_iff );
+    transform_later(toobig_true);
+    slow_region    ->init_req( too_big_or_final_path, toobig_true );
+    toobig_false = new (C, 1) IfFalseNode( toobig_iff );
+    transform_later(toobig_false);
+  } else {         // No initial test, just fall into next case
+    toobig_false = ctrl;
+    debug_only(slow_region = NodeSentinel);
+  }
+
+  Node *slow_mem = mem;  // save the current memory state for slow path
+  // generate the fast allocation code unless we know that the initial test will always go slow
+  if (!always_slow) {
+    // allocate the Region and Phi nodes for the result
+    result_region = new (C, 3) RegionNode(3);
+    result_phi_rawmem = new (C, 3) PhiNode( result_region, Type::MEMORY, TypeRawPtr::BOTTOM );
+    result_phi_rawoop = new (C, 3) PhiNode( result_region, TypeRawPtr::BOTTOM );
+    result_phi_i_o    = new (C, 3) PhiNode( result_region, Type::ABIO ); // I/O is used for Prefetch
+
+    // We need a Region for the loop-back contended case.
+    enum { fall_in_path = 1, contended_loopback_path = 2 };
+    Node *contended_region;
+    Node *contended_phi_rawmem;
+    if( UseTLAB ) {
+      contended_region = toobig_false;
+      contended_phi_rawmem = mem;
+    } else {
+      contended_region = new (C, 3) RegionNode(3);
+      contended_phi_rawmem = new (C, 3) PhiNode( contended_region, Type::MEMORY, TypeRawPtr::BOTTOM);
+      // Now handle the passing-too-big test.  We fall into the contended
+      // loop-back merge point.
+      contended_region    ->init_req( fall_in_path, toobig_false );
+      contended_phi_rawmem->init_req( fall_in_path, mem );
+      transform_later(contended_region);
+      transform_later(contended_phi_rawmem);
+    }
+
+    // Load(-locked) the heap top.
+    // See note above concerning the control input when using a TLAB
+    Node *old_eden_top = UseTLAB
+      ? new (C, 3) LoadPNode     ( ctrl, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM )
+      : new (C, 3) LoadPLockedNode( contended_region, contended_phi_rawmem, eden_top_adr );
+
+    transform_later(old_eden_top);
+    // Add to heap top to get a new heap top
+    Node *new_eden_top = new (C, 4) AddPNode( top(), old_eden_top, size_in_bytes );
+    transform_later(new_eden_top);
+    // Check for needing a GC; compare against heap end
+    Node *needgc_cmp = new (C, 3) CmpPNode( new_eden_top, eden_end );
+    transform_later(needgc_cmp);
+    Node *needgc_bol = new (C, 2) BoolNode( needgc_cmp, BoolTest::ge );
+    transform_later(needgc_bol);
+    IfNode *needgc_iff = new (C, 2) IfNode(contended_region, needgc_bol, PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
+    transform_later(needgc_iff);
+
+    // Plug the failing-heap-space-need-gc test into the slow-path region
+    Node *needgc_true = new (C, 1) IfTrueNode( needgc_iff );
+    transform_later(needgc_true);
+    if( initial_slow_test ) {
+      slow_region    ->init_req( need_gc_path, needgc_true );
+      // This completes all paths into the slow merge point
+      transform_later(slow_region);
+    } else {                      // No initial slow path needed!
+      // Just fall from the need-GC path straight into the VM call.
+      slow_region    = needgc_true;
+    }
+    // No need for a GC.  Setup for the Store-Conditional
+    Node *needgc_false = new (C, 1) IfFalseNode( needgc_iff );
+    transform_later(needgc_false);
+
+    // Grab regular I/O before optional prefetch may change it.
+    // Slow-path does no I/O so just set it to the original I/O.
+    result_phi_i_o->init_req( slow_result_path, i_o );
+
+    i_o = prefetch_allocation(i_o, needgc_false, contended_phi_rawmem,
+                              old_eden_top, new_eden_top, length);
+
+    // Store (-conditional) the modified eden top back down.
+    // StorePConditional produces flags for a test PLUS a modified raw
+    // memory state.
+    Node *store_eden_top;
+    Node *fast_oop_ctrl;
+    if( UseTLAB ) {
+      store_eden_top = new (C, 4) StorePNode( needgc_false, contended_phi_rawmem, eden_top_adr, TypeRawPtr::BOTTOM, new_eden_top );
+      transform_later(store_eden_top);
+      fast_oop_ctrl = needgc_false; // No contention, so this is the fast path
+    } else {
+      store_eden_top = new (C, 5) StorePConditionalNode( needgc_false, contended_phi_rawmem, eden_top_adr, new_eden_top, old_eden_top );
+      transform_later(store_eden_top);
+      Node *contention_check = new (C, 2) BoolNode( store_eden_top, BoolTest::ne );
+      transform_later(contention_check);
+      store_eden_top = new (C, 1) SCMemProjNode(store_eden_top);
+      transform_later(store_eden_top);
+
+      // If not using TLABs, check to see if there was contention.
+      IfNode *contention_iff = new (C, 2) IfNode ( needgc_false, contention_check, PROB_MIN, COUNT_UNKNOWN );
+      transform_later(contention_iff);
+      Node *contention_true = new (C, 1) IfTrueNode( contention_iff );
+      transform_later(contention_true);
+      // If contention, loopback and try again.
+      contended_region->init_req( contended_loopback_path, contention_true );
+      contended_phi_rawmem->init_req( contended_loopback_path, store_eden_top );
+
+      // Fast-path succeeded with no contention!
+      Node *contention_false = new (C, 1) IfFalseNode( contention_iff );
+      transform_later(contention_false);
+      fast_oop_ctrl = contention_false;
+    }
+
+    // Rename successful fast-path variables to make meaning more obvious
+    Node* fast_oop        = old_eden_top;
+    Node* fast_oop_rawmem = store_eden_top;
+    fast_oop_rawmem = initialize_object(alloc,
+                                        fast_oop_ctrl, fast_oop_rawmem, fast_oop,
+                                        klass_node, length, size_in_bytes);
+
+    if (ExtendedDTraceProbes) {
+      // Slow-path call
+      int size = TypeFunc::Parms + 2;
+      CallLeafNode *call = new (C, size) CallLeafNode(OptoRuntime::dtrace_object_alloc_Type(),
+                                                      CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_object_alloc_base),
+                                                      "dtrace_object_alloc",
+                                                      TypeRawPtr::BOTTOM);
+
+      // Get base of thread-local storage area
+      Node* thread = new (C, 1) ThreadLocalNode();
+      transform_later(thread);
+
+      call->init_req(TypeFunc::Parms+0, thread);
+      call->init_req(TypeFunc::Parms+1, fast_oop);
+      call->init_req( TypeFunc::Control, fast_oop_ctrl );
+      call->init_req( TypeFunc::I_O    , top() )        ;   // does no i/o
+      call->init_req( TypeFunc::Memory , fast_oop_rawmem );
+      call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) );
+      call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) );
+      transform_later(call);
+      fast_oop_ctrl = new (C, 1) ProjNode(call,TypeFunc::Control);
+      transform_later(fast_oop_ctrl);
+      fast_oop_rawmem = new (C, 1) ProjNode(call,TypeFunc::Memory);
+      transform_later(fast_oop_rawmem);
+    }
+
+    // Plug in the successful fast-path into the result merge point
+    result_region    ->init_req( fast_result_path, fast_oop_ctrl );
+    result_phi_rawoop->init_req( fast_result_path, fast_oop );
+    result_phi_i_o   ->init_req( fast_result_path, i_o );
+    result_phi_rawmem->init_req( fast_result_path, fast_oop_rawmem );
+  } else {
+    slow_region = ctrl;
+  }
+
+  // Generate slow-path call
+  CallNode *call = new (C, slow_call_type->domain()->cnt())
+    CallStaticJavaNode(slow_call_type, slow_call_address,
+                       OptoRuntime::stub_name(slow_call_address),
+                       alloc->jvms()->bci(),
+                       TypePtr::BOTTOM);
+  call->init_req( TypeFunc::Control, slow_region );
+  call->init_req( TypeFunc::I_O    , top() )     ;   // does no i/o
+  call->init_req( TypeFunc::Memory , slow_mem ); // may gc ptrs
+  call->init_req( TypeFunc::ReturnAdr, alloc->in(TypeFunc::ReturnAdr) );
+  call->init_req( TypeFunc::FramePtr, alloc->in(TypeFunc::FramePtr) );
+
+  call->init_req(TypeFunc::Parms+0, klass_node);
+  if (length != NULL) {
+    call->init_req(TypeFunc::Parms+1, length);
+  }
+
+  // Copy debug information and adjust JVMState information, then replace
+  // allocate node with the call
+  copy_call_debug_info((CallNode *) alloc,  call);
+  if (!always_slow) {
+    call->set_cnt(PROB_UNLIKELY_MAG(4));  // Same effect as RC_UNCOMMON.
+  }
+  _igvn.hash_delete(alloc);
+  _igvn.subsume_node(alloc, call);
+  transform_later(call);
+
+  // Identify the output projections from the allocate node and
+  // adjust any references to them.
+  // The control and io projections look like:
+  //
+  //        v---Proj(ctrl) <-----+   v---CatchProj(ctrl)
+  //  Allocate                   Catch
+  //        ^---Proj(io) <-------+   ^---CatchProj(io)
+  //
+  //  We are interested in the CatchProj nodes.
+  //
+  extract_call_projections(call);
+
+  // An allocate node has separate memory projections for the uses on the control and i_o paths
+  // Replace uses of the control memory projection with result_phi_rawmem (unless we are only generating a slow call)
+  if (!always_slow && _memproj_fallthrough != NULL) {
+    for (DUIterator_Fast imax, i = _memproj_fallthrough->fast_outs(imax); i < imax; i++) {
+      Node *use = _memproj_fallthrough->fast_out(i);
+      _igvn.hash_delete(use);
+      imax -= replace_input(use, _memproj_fallthrough, result_phi_rawmem);
+      _igvn._worklist.push(use);
+      // back up iterator
+      --i;
+    }
+  }
+  // Now change uses of _memproj_catchall to use _memproj_fallthrough and delete _memproj_catchall so
+  // we end up with a call that has only 1 memory projection
+  if (_memproj_catchall != NULL ) {
+    if (_memproj_fallthrough == NULL) {
+      _memproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::Memory);
+      transform_later(_memproj_fallthrough);
+    }
+    for (DUIterator_Fast imax, i = _memproj_catchall->fast_outs(imax); i < imax; i++) {
+      Node *use = _memproj_catchall->fast_out(i);
+      _igvn.hash_delete(use);
+      imax -= replace_input(use, _memproj_catchall, _memproj_fallthrough);
+      _igvn._worklist.push(use);
+      // back up iterator
+      --i;
+    }
+  }
+
+  mem = result_phi_rawmem;
+
+  // An allocate node has separate i_o projections for the uses on the control and i_o paths
+  // Replace uses of the control i_o projection with result_phi_i_o (unless we are only generating a slow call)
+  if (_ioproj_fallthrough == NULL) {
+    _ioproj_fallthrough = new (C, 1) ProjNode(call, TypeFunc::I_O);
+    transform_later(_ioproj_fallthrough);
+  } else if (!always_slow) {
+    for (DUIterator_Fast imax, i = _ioproj_fallthrough->fast_outs(imax); i < imax; i++) {
+      Node *use = _ioproj_fallthrough->fast_out(i);
+
+      _igvn.hash_delete(use);
+      imax -= replace_input(use, _ioproj_fallthrough, result_phi_i_o);
+      _igvn._worklist.push(use);
+      // back up iterator
+      --i;
+    }
+  }
+  // Now change uses of _ioproj_catchall to use _ioproj_fallthrough and delete _ioproj_catchall so
+  // we end up with a call that has only 1 control projection
+  if (_ioproj_catchall != NULL ) {
+    for (DUIterator_Fast imax, i = _ioproj_catchall->fast_outs(imax); i < imax; i++) {
+      Node *use = _ioproj_catchall->fast_out(i);
+      _igvn.hash_delete(use);
+      imax -= replace_input(use, _ioproj_catchall, _ioproj_fallthrough);
+      _igvn._worklist.push(use);
+      // back up iterator
+      --i;
+    }
+  }
+
+  // if we generated only a slow call, we are done
+  if (always_slow)
+    return;
+
+
+  if (_fallthroughcatchproj != NULL) {
+    ctrl = _fallthroughcatchproj->clone();
+    transform_later(ctrl);
+    _igvn.hash_delete(_fallthroughcatchproj);
+    _igvn.subsume_node(_fallthroughcatchproj, result_region);
+  } else {
+    ctrl = top();
+  }
+  Node *slow_result;
+  if (_resproj == NULL) {
+    // no uses of the allocation result
+    slow_result = top();
+  } else {
+    slow_result = _resproj->clone();
+    transform_later(slow_result);
+    _igvn.hash_delete(_resproj);
+    _igvn.subsume_node(_resproj, result_phi_rawoop);
+  }
+
+  // Plug slow-path into result merge point
+  result_region    ->init_req( slow_result_path, ctrl );
+  result_phi_rawoop->init_req( slow_result_path, slow_result);
+  result_phi_rawmem->init_req( slow_result_path, _memproj_fallthrough );
+  transform_later(result_region);
+  transform_later(result_phi_rawoop);
+  transform_later(result_phi_rawmem);
+  transform_later(result_phi_i_o);
+  // This completes all paths into the result merge point
+}
+
+
+// Helper for PhaseMacroExpand::expand_allocate_common.
+// Initializes the newly-allocated storage.
+Node*
+PhaseMacroExpand::initialize_object(AllocateNode* alloc,
+                                    Node* control, Node* rawmem, Node* object,
+                                    Node* klass_node, Node* length,
+                                    Node* size_in_bytes) {
+  InitializeNode* init = alloc->initialization();
+  // Store the klass & mark bits
+  Node* mark_node = NULL;
+  // For now only enable fast locking for non-array types
+  if (UseBiasedLocking && (length == NULL)) {
+    mark_node = make_load(NULL, rawmem, klass_node, Klass::prototype_header_offset_in_bytes() + sizeof(oopDesc), TypeRawPtr::BOTTOM, T_ADDRESS);
+  } else {
+    mark_node = makecon(TypeRawPtr::make((address)markOopDesc::prototype()));
+  }
+  rawmem = make_store(control, rawmem, object, oopDesc::mark_offset_in_bytes(), mark_node, T_ADDRESS);
+  rawmem = make_store(control, rawmem, object, oopDesc::klass_offset_in_bytes(), klass_node, T_OBJECT);
+  int header_size = alloc->minimum_header_size();  // conservatively small
+
+  // Array length
+  if (length != NULL) {         // Arrays need length field
+    rawmem = make_store(control, rawmem, object, arrayOopDesc::length_offset_in_bytes(), length, T_INT);
+    // conservatively small header size:
+    header_size = sizeof(arrayOopDesc);
+    ciKlass* k = _igvn.type(klass_node)->is_klassptr()->klass();
+    if (k->is_array_klass())    // we know the exact header size in most cases:
+      header_size = Klass::layout_helper_header_size(k->layout_helper());
+  }
+
+  // Clear the object body, if necessary.
+  if (init == NULL) {
+    // The init has somehow disappeared; be cautious and clear everything.
+    //
+    // This can happen if a node is allocated but an uncommon trap occurs
+    // immediately.  In this case, the Initialize gets associated with the
+    // trap, and may be placed in a different (outer) loop, if the Allocate
+    // is in a loop.  If (this is rare) the inner loop gets unrolled, then
+    // there can be two Allocates to one Initialize.  The answer in all these
+    // edge cases is safety first.  It is always safe to clear immediately
+    // within an Allocate, and then (maybe or maybe not) clear some more later.
+    if (!ZeroTLAB)
+      rawmem = ClearArrayNode::clear_memory(control, rawmem, object,
+                                            header_size, size_in_bytes,
+                                            &_igvn);
+  } else {
+    if (!init->is_complete()) {
+      // Try to win by zeroing only what the init does not store.
+      // We can also try to do some peephole optimizations,
+      // such as combining some adjacent subword stores.
+      rawmem = init->complete_stores(control, rawmem, object,
+                                     header_size, size_in_bytes, &_igvn);
+    }
+
+    // We have no more use for this link, since the AllocateNode goes away:
+    init->set_req(InitializeNode::RawAddress, top());
+    // (If we keep the link, it just confuses the register allocator,
+    // who thinks he sees a real use of the address by the membar.)
+  }
+
+  return rawmem;
+}
+
+// Generate prefetch instructions for next allocations.
+Node* PhaseMacroExpand::prefetch_allocation(Node* i_o, Node*& needgc_false,
+                                        Node*& contended_phi_rawmem,
+                                        Node* old_eden_top, Node* new_eden_top,
+                                        Node* length) {
+   if( UseTLAB && AllocatePrefetchStyle == 2 ) {
+      // Generate prefetch allocation with watermark check.
+      // As an allocation hits the watermark, we will prefetch starting
+      // at a "distance" away from watermark.
+      enum { fall_in_path = 1, pf_path = 2 };
+
+      Node *pf_region = new (C, 3) RegionNode(3);
+      Node *pf_phi_rawmem = new (C, 3) PhiNode( pf_region, Type::MEMORY,
+                                                TypeRawPtr::BOTTOM );
+      // I/O is used for Prefetch
+      Node *pf_phi_abio = new (C, 3) PhiNode( pf_region, Type::ABIO );
+
+      Node *thread = new (C, 1) ThreadLocalNode();
+      transform_later(thread);
+
+      Node *eden_pf_adr = new (C, 4) AddPNode( top()/*not oop*/, thread,
+                   _igvn.MakeConX(in_bytes(JavaThread::tlab_pf_top_offset())) );
+      transform_later(eden_pf_adr);
+
+      Node *old_pf_wm = new (C, 3) LoadPNode( needgc_false,
+                                   contended_phi_rawmem, eden_pf_adr,
+                                   TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM );
+      transform_later(old_pf_wm);
+
+      // check against new_eden_top
+      Node *need_pf_cmp = new (C, 3) CmpPNode( new_eden_top, old_pf_wm );
+      transform_later(need_pf_cmp);
+      Node *need_pf_bol = new (C, 2) BoolNode( need_pf_cmp, BoolTest::ge );
+      transform_later(need_pf_bol);
+      IfNode *need_pf_iff = new (C, 2) IfNode( needgc_false, need_pf_bol,
+                                       PROB_UNLIKELY_MAG(4), COUNT_UNKNOWN );
+      transform_later(need_pf_iff);
+
+      // true node, add prefetchdistance
+      Node *need_pf_true = new (C, 1) IfTrueNode( need_pf_iff );
+      transform_later(need_pf_true);
+
+      Node *need_pf_false = new (C, 1) IfFalseNode( need_pf_iff );
+      transform_later(need_pf_false);
+
+      Node *new_pf_wmt = new (C, 4) AddPNode( top(), old_pf_wm,
+                                    _igvn.MakeConX(AllocatePrefetchDistance) );
+      transform_later(new_pf_wmt );
+      new_pf_wmt->set_req(0, need_pf_true);
+
+      Node *store_new_wmt = new (C, 4) StorePNode( need_pf_true,
+                                       contended_phi_rawmem, eden_pf_adr,
+                                       TypeRawPtr::BOTTOM, new_pf_wmt );
+      transform_later(store_new_wmt);
+
+      // adding prefetches
+      pf_phi_abio->init_req( fall_in_path, i_o );
+
+      Node *prefetch_adr;
+      Node *prefetch;
+      uint lines = AllocatePrefetchDistance / AllocatePrefetchStepSize;
+      uint step_size = AllocatePrefetchStepSize;
+      uint distance = 0;
+
+      for ( uint i = 0; i < lines; i++ ) {
+        prefetch_adr = new (C, 4) AddPNode( old_pf_wm, new_pf_wmt,
+                                            _igvn.MakeConX(distance) );
+        transform_later(prefetch_adr);
+        prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
+        transform_later(prefetch);
+        distance += step_size;
+        i_o = prefetch;
+      }
+      pf_phi_abio->set_req( pf_path, i_o );
+
+      pf_region->init_req( fall_in_path, need_pf_false );
+      pf_region->init_req( pf_path, need_pf_true );
+
+      pf_phi_rawmem->init_req( fall_in_path, contended_phi_rawmem );
+      pf_phi_rawmem->init_req( pf_path, store_new_wmt );
+
+      transform_later(pf_region);
+      transform_later(pf_phi_rawmem);
+      transform_later(pf_phi_abio);
+
+      needgc_false = pf_region;
+      contended_phi_rawmem = pf_phi_rawmem;
+      i_o = pf_phi_abio;
+   } else if( AllocatePrefetchStyle > 0 ) {
+      // Insert a prefetch for each allocation only on the fast-path
+      Node *prefetch_adr;
+      Node *prefetch;
+      // Generate several prefetch instructions only for arrays.
+      uint lines = (length != NULL) ? AllocatePrefetchLines : 1;
+      uint step_size = AllocatePrefetchStepSize;
+      uint distance = AllocatePrefetchDistance;
+      for ( uint i = 0; i < lines; i++ ) {
+        prefetch_adr = new (C, 4) AddPNode( old_eden_top, new_eden_top,
+                                            _igvn.MakeConX(distance) );
+        transform_later(prefetch_adr);
+        prefetch = new (C, 3) PrefetchWriteNode( i_o, prefetch_adr );
+        // Do not let it float too high, since if eden_top == eden_end,
+        // both might be null.
+        if( i == 0 ) { // Set control for first prefetch, next follows it
+          prefetch->init_req(0, needgc_false);
+        }
+        transform_later(prefetch);
+        distance += step_size;
+        i_o = prefetch;
+      }
+   }
+   return i_o;
+}
+
+
+void PhaseMacroExpand::expand_allocate(AllocateNode *alloc) {
+  expand_allocate_common(alloc, NULL,
+                         OptoRuntime::new_instance_Type(),
+                         OptoRuntime::new_instance_Java());
+}
+
+void PhaseMacroExpand::expand_allocate_array(AllocateArrayNode *alloc) {
+  Node* length = alloc->in(AllocateNode::ALength);
+  expand_allocate_common(alloc, length,
+                         OptoRuntime::new_array_Type(),
+                         OptoRuntime::new_array_Java());
+}
+
+
+// we have determined that this lock/unlock can be eliminated, we simply
+// eliminate the node without expanding it.
+//
+// Note:  The membar's associated with the lock/unlock are currently not
+//        eliminated.  This should be investigated as a future enhancement.
+//
+void PhaseMacroExpand::eliminate_locking_node(AbstractLockNode *alock) {
+  Node* mem = alock->in(TypeFunc::Memory);
+
+  // The memory projection from a lock/unlock is RawMem
+  // The input to a Lock is merged memory, so extract its RawMem input
+  // (unless the MergeMem has been optimized away.)
+  if (alock->is_Lock()) {
+    if (mem->is_MergeMem())
+      mem = mem->as_MergeMem()->in(Compile::AliasIdxRaw);
+  }
+
+  extract_call_projections(alock);
+  // There are 2 projections from the lock.  The lock node will
+  // be deleted when its last use is subsumed below.
+  assert(alock->outcnt() == 2 && _fallthroughproj != NULL &&
+          _memproj_fallthrough != NULL, "Unexpected projections from Lock/Unlock");
+  _igvn.hash_delete(_fallthroughproj);
+  _igvn.subsume_node(_fallthroughproj, alock->in(TypeFunc::Control));
+  _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.subsume_node(_memproj_fallthrough, mem);
+  return;
+}
+
+
+//------------------------------expand_lock_node----------------------
+void PhaseMacroExpand::expand_lock_node(LockNode *lock) {
+
+  Node* ctrl = lock->in(TypeFunc::Control);
+  Node* mem = lock->in(TypeFunc::Memory);
+  Node* obj = lock->obj_node();
+  Node* box = lock->box_node();
+  Node *flock = lock->fastlock_node();
+
+  if (lock->is_eliminated()) {
+    eliminate_locking_node(lock);
+    return;
+  }
+
+  // Make the merge point
+  Node *region = new (C, 3) RegionNode(3);
+
+  Node *bol = transform_later(new (C, 2) BoolNode(flock,BoolTest::ne));
+  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+  // Optimize test; set region slot 2
+  Node *slow_path = opt_iff(region,iff);
+
+  // Make slow path call
+  CallNode *call = make_slow_call( (CallNode *) lock, OptoRuntime::complete_monitor_enter_Type(), OptoRuntime::complete_monitor_locking_Java(), NULL, slow_path, obj, box );
+
+  extract_call_projections(call);
+
+  // Slow path can only throw asynchronous exceptions, which are always
+  // de-opted.  So the compiler thinks the slow-call can never throw an
+  // exception.  If it DOES throw an exception we would need the debug
+  // info removed first (since if it throws there is no monitor).
+  assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
+           _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
+
+  // Capture slow path
+  // disconnect fall-through projection from call and create a new one
+  // hook up users of fall-through projection to region
+  Node *slow_ctrl = _fallthroughproj->clone();
+  transform_later(slow_ctrl);
+  _igvn.hash_delete(_fallthroughproj);
+  _fallthroughproj->disconnect_inputs(NULL);
+  region->init_req(1, slow_ctrl);
+  // region inputs are now complete
+  transform_later(region);
+  _igvn.subsume_node(_fallthroughproj, region);
+
+  // create a Phi for the memory state
+  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+  Node *memproj = transform_later( new (C, 1) ProjNode(call, TypeFunc::Memory) );
+  mem_phi->init_req(1, memproj );
+  mem_phi->init_req(2, mem);
+  transform_later(mem_phi);
+    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.subsume_node(_memproj_fallthrough, mem_phi);
+
+
+}
+
+//------------------------------expand_unlock_node----------------------
+void PhaseMacroExpand::expand_unlock_node(UnlockNode *unlock) {
+
+  Node *ctrl = unlock->in(TypeFunc::Control);
+  Node* mem = unlock->in(TypeFunc::Memory);
+  Node* obj = unlock->obj_node();
+  Node* box = unlock->box_node();
+
+
+  if (unlock->is_eliminated()) {
+    eliminate_locking_node(unlock);
+    return;
+  }
+
+  // No need for a null check on unlock
+
+  // Make the merge point
+  RegionNode *region = new (C, 3) RegionNode(3);
+
+  FastUnlockNode *funlock = new (C, 3) FastUnlockNode( ctrl, obj, box );
+  funlock = transform_later( funlock )->as_FastUnlock();
+  Node *bol = transform_later(new (C, 2) BoolNode(funlock,BoolTest::ne));
+  Node *iff = new (C, 2) IfNode( ctrl, bol, PROB_MIN, COUNT_UNKNOWN );
+  // Optimize test; set region slot 2
+  Node *slow_path = opt_iff(region,iff);
+
+  CallNode *call = make_slow_call( (CallNode *) unlock, OptoRuntime::complete_monitor_exit_Type(), CAST_FROM_FN_PTR(address, SharedRuntime::complete_monitor_unlocking_C), "complete_monitor_unlocking_C", slow_path, obj, box );
+
+  extract_call_projections(call);
+
+  assert ( _ioproj_fallthrough == NULL && _ioproj_catchall == NULL &&
+           _memproj_catchall == NULL && _catchallcatchproj == NULL, "Unexpected projection from Lock");
+
+  // No exceptions for unlocking
+  // Capture slow path
+  // disconnect fall-through projection from call and create a new one
+  // hook up users of fall-through projection to region
+  Node *slow_ctrl = _fallthroughproj->clone();
+  transform_later(slow_ctrl);
+  _igvn.hash_delete(_fallthroughproj);
+  _fallthroughproj->disconnect_inputs(NULL);
+  region->init_req(1, slow_ctrl);
+  // region inputs are now complete
+  transform_later(region);
+  _igvn.subsume_node(_fallthroughproj, region);
+
+  // create a Phi for the memory state
+  Node *mem_phi = new (C, 3) PhiNode( region, Type::MEMORY, TypeRawPtr::BOTTOM);
+  Node *memproj = transform_later( new(C, 1) ProjNode(call, TypeFunc::Memory) );
+  mem_phi->init_req(1, memproj );
+  mem_phi->init_req(2, mem);
+  transform_later(mem_phi);
+    _igvn.hash_delete(_memproj_fallthrough);
+  _igvn.subsume_node(_memproj_fallthrough, mem_phi);
+
+
+}
+
+//------------------------------expand_macro_nodes----------------------
+//  Returns true if a failure occurred.
+bool PhaseMacroExpand::expand_macro_nodes() {
+  if (C->macro_count() == 0)
+    return false;
+  // Make sure expansion will not cause node limit to be exceeded.  Worst case is a
+  // macro node gets expanded into about 50 nodes.  Allow 50% more for optimization
+  if (C->check_node_count(C->macro_count() * 75, "out of nodes before macro expansion" ) )
+    return true;
+  // expand "macro" nodes
+  // nodes are removed from the macro list as they are processed
+  while (C->macro_count() > 0) {
+    Node * n = C->macro_node(0);
+    assert(n->is_macro(), "only macro nodes expected here");
+    if (_igvn.type(n) == Type::TOP || n->in(0)->is_top() ) {
+      // node is unreachable, so don't try to expand it
+      C->remove_macro_node(n);
+      continue;
+    }
+    switch (n->class_id()) {
+    case Node::Class_Allocate:
+      expand_allocate(n->as_Allocate());
+      break;
+    case Node::Class_AllocateArray:
+      expand_allocate_array(n->as_AllocateArray());
+      break;
+    case Node::Class_Lock:
+      expand_lock_node(n->as_Lock());
+      break;
+    case Node::Class_Unlock:
+      expand_unlock_node(n->as_Unlock());
+      break;
+    default:
+      assert(false, "unknown node type in macro list");
+    }
+    if (C->failing())  return true;
+  }
+  _igvn.optimize();
+  return false;
+}
diff --git a/src/share/vm/opto/macro.hpp b/src/share/vm/opto/macro.hpp
new file mode 100644
index 000000000..20dd65c40
--- /dev/null
+++ b/src/share/vm/opto/macro.hpp
@@ -0,0 +1,107 @@
+/*
+ * Copyright 2005-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class  AllocateNode;
+class  AllocateArrayNode;
+class  CallNode;
+class  Node;
+class  PhaseIterGVN;
+
+class PhaseMacroExpand : public Phase {
+private:
+  PhaseIterGVN &_igvn;
+
+  // Helper methods roughly modelled after GraphKit:
+  Node* top()                   const { return C->top(); }
+  Node* intcon(jint con)        const { return _igvn.intcon(con); }
+  Node* longcon(jlong con)      const { return _igvn.longcon(con); }
+  Node* makecon(const Type *t)  const { return _igvn.makecon(t); }
+  Node* basic_plus_adr(Node* base, int offset) {
+    return (offset == 0)? base: basic_plus_adr(base, MakeConX(offset));
+  }
+  Node* basic_plus_adr(Node* base, Node* ptr, int offset) {
+    return (offset == 0)? ptr: basic_plus_adr(base, ptr, MakeConX(offset));
+  }
+  Node* basic_plus_adr(Node* base, Node* offset) {
+    return basic_plus_adr(base, base, offset);
+  }
+  Node* basic_plus_adr(Node* base, Node* ptr, Node* offset) {
+    Node* adr = new (C, 4) AddPNode(base, ptr, offset);
+    return transform_later(adr);
+  }
+  Node* transform_later(Node* n) {
+    // equivalent to _gvn.transform in GraphKit, Ideal, etc.
+    _igvn.register_new_node_with_optimizer(n);
+    return n;
+  }
+  void set_eden_pointers(Node* &eden_top_adr, Node* &eden_end_adr);
+  Node* make_load( Node* ctl, Node* mem, Node* base, int offset,
+                   const Type* value_type, BasicType bt);
+  Node* make_store(Node* ctl, Node* mem, Node* base, int offset,
+                   Node* value, BasicType bt);
+
+  // projections extracted from a call node
+  ProjNode *_fallthroughproj;
+  ProjNode *_fallthroughcatchproj;
+  ProjNode *_ioproj_fallthrough;
+  ProjNode *_ioproj_catchall;
+  ProjNode *_catchallcatchproj;
+  ProjNode *_memproj_fallthrough;
+  ProjNode *_memproj_catchall;
+  ProjNode *_resproj;
+
+
+  void expand_allocate(AllocateNode *alloc);
+  void expand_allocate_array(AllocateArrayNode *alloc);
+  void expand_allocate_common(AllocateNode* alloc,
+                              Node* length,
+                              const TypeFunc* slow_call_type,
+                              address slow_call_address);
+  void eliminate_locking_node(AbstractLockNode *alock);
+  void expand_lock_node(LockNode *lock);
+  void expand_unlock_node(UnlockNode *unlock);
+
+  int replace_input(Node *use, Node *oldref, Node *newref);
+  void copy_call_debug_info(CallNode *oldcall, CallNode * newcall);
+  Node* opt_iff(Node* region, Node* iff);
+  void copy_predefined_input_for_runtime_call(Node * ctrl, CallNode* oldcall, CallNode* call);
+  CallNode* make_slow_call(CallNode *oldcall, const TypeFunc* slow_call_type, address slow_call,
+                       const char* leaf_name, Node* slow_path, Node* parm0, Node* parm1);
+  void extract_call_projections(CallNode *call);
+
+  Node* initialize_object(AllocateNode* alloc,
+                          Node* control, Node* rawmem, Node* object,
+                          Node* klass_node, Node* length,
+                          Node* size_in_bytes);
+
+  Node* prefetch_allocation(Node* i_o,
+                            Node*& needgc_false, Node*& contended_phi_rawmem,
+                            Node* old_eden_top, Node* new_eden_top,
+                            Node* length);
+
+public:
+  PhaseMacroExpand(PhaseIterGVN &igvn) : Phase(Macro_Expand), _igvn(igvn) {}
+  bool expand_macro_nodes();
+
+};
diff --git a/src/share/vm/opto/matcher.cpp b/src/share/vm/opto/matcher.cpp
new file mode 100644
index 000000000..a8d673a7f
--- /dev/null
+++ b/src/share/vm/opto/matcher.cpp
@@ -0,0 +1,2123 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_matcher.cpp.incl"
+
+OptoReg::Name OptoReg::c_frame_pointer;
+
+
+
+const int Matcher::base2reg[Type::lastype] = {
+  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0,
+  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
+  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
+  0, 0/*abio*/,
+  Op_RegP /* Return address */, 0, /* the memories */
+  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
+  0  /*bottom*/
+};
+
+const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
+RegMask Matcher::mreg2regmask[_last_Mach_Reg];
+RegMask Matcher::STACK_ONLY_mask;
+RegMask Matcher::c_frame_ptr_mask;
+const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
+const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;
+
+//---------------------------Matcher-------------------------------------------
+Matcher::Matcher( Node_List &proj_list ) :
+  PhaseTransform( Phase::Ins_Select ),
+#ifdef ASSERT
+  _old2new_map(C->comp_arena()),
+#endif
+  _shared_constants(C->comp_arena()),
+  _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
+  _swallowed(swallowed),
+  _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
+  _end_inst_chain_rule(_END_INST_CHAIN_RULE),
+  _must_clone(must_clone), _proj_list(proj_list),
+  _register_save_policy(register_save_policy),
+  _c_reg_save_policy(c_reg_save_policy),
+  _register_save_type(register_save_type),
+  _ruleName(ruleName),
+  _allocation_started(false),
+  _states_arena(Chunk::medium_size),
+  _visited(&_states_arena),
+  _shared(&_states_arena),
+  _dontcare(&_states_arena) {
+  C->set_matcher(this);
+
+  idealreg2spillmask[Op_RegI] = NULL;
+  idealreg2spillmask[Op_RegL] = NULL;
+  idealreg2spillmask[Op_RegF] = NULL;
+  idealreg2spillmask[Op_RegD] = NULL;
+  idealreg2spillmask[Op_RegP] = NULL;
+
+  idealreg2debugmask[Op_RegI] = NULL;
+  idealreg2debugmask[Op_RegL] = NULL;
+  idealreg2debugmask[Op_RegF] = NULL;
+  idealreg2debugmask[Op_RegD] = NULL;
+  idealreg2debugmask[Op_RegP] = NULL;
+}
+
+//------------------------------warp_incoming_stk_arg------------------------
+// This warps a VMReg into an OptoReg::Name
+OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
+  OptoReg::Name warped;
+  if( reg->is_stack() ) {  // Stack slot argument?
+    warped = OptoReg::add(_old_SP, reg->reg2stack() );
+    warped = OptoReg::add(warped, C->out_preserve_stack_slots());
+    if( warped >= _in_arg_limit )
+      _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
+    if (!RegMask::can_represent(warped)) {
+      // the compiler cannot represent this method's calling sequence
+      C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
+      return OptoReg::Bad;
+    }
+    return warped;
+  }
+  return OptoReg::as_OptoReg(reg);
+}
+
+//---------------------------compute_old_SP------------------------------------
+OptoReg::Name Compile::compute_old_SP() {
+  int fixed    = fixed_slots();
+  int preserve = in_preserve_stack_slots();
+  return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots()));
+}
+
+
+
+#ifdef ASSERT
+void Matcher::verify_new_nodes_only(Node* xroot) {
+  // Make sure that the new graph only references new nodes
+  ResourceMark rm;
+  Unique_Node_List worklist;
+  VectorSet visited(Thread::current()->resource_area());
+  worklist.push(xroot);
+  while (worklist.size() > 0) {
+    Node* n = worklist.pop();
+    visited <<= n->_idx;
+    assert(C->node_arena()->contains(n), "dead node");
+    for (uint j = 0; j < n->req(); j++) {
+      Node* in = n->in(j);
+      if (in != NULL) {
+        assert(C->node_arena()->contains(in), "dead node");
+        if (!visited.test(in->_idx)) {
+          worklist.push(in);
+        }
+      }
+    }
+  }
+}
+#endif
+
+
+//---------------------------match---------------------------------------------
+void Matcher::match( ) {
+  // One-time initialization of some register masks.
+  init_spill_mask( C->root()->in(1) );
+  _return_addr_mask = return_addr();
+#ifdef _LP64
+  // Pointers take 2 slots in 64-bit land
+  _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
+#endif
+
+  // Map a Java-signature return type into return register-value
+  // machine registers for 0, 1 and 2 returned values.
+  const TypeTuple *range = C->tf()->range();
+  if( range->cnt() > TypeFunc::Parms ) { // If not a void function
+    // Get ideal-register return type
+    int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()];
+    // Get machine return register
+    uint sop = C->start()->Opcode();
+    OptoRegPair regs = return_value(ireg, false);
+
+    // And mask for same
+    _return_value_mask = RegMask(regs.first());
+    if( OptoReg::is_valid(regs.second()) )
+      _return_value_mask.Insert(regs.second());
+  }
+
+  // ---------------
+  // Frame Layout
+
+  // Need the method signature to determine the incoming argument types,
+  // because the types determine which registers the incoming arguments are
+  // in, and this affects the matched code.
+  const TypeTuple *domain = C->tf()->domain();
+  uint             argcnt = domain->cnt() - TypeFunc::Parms;
+  BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
+  VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
+  _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
+  _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
+  uint i;
+  for( i = 0; i<argcnt; i++ ) {
+    sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
+  }
+
+  // Pass array of ideal registers and length to USER code (from the AD file)
+  // that will convert this to an array of register numbers.
+  const StartNode *start = C->start();
+  start->calling_convention( sig_bt, vm_parm_regs, argcnt );
+#ifdef ASSERT
+  // Sanity check users' calling convention.  Real handy while trying to
+  // get the initial port correct.
+  { for (uint i = 0; i<argcnt; i++) {
+      if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
+        assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
+        _parm_regs[i].set_bad();
+        continue;
+      }
+      VMReg parm_reg = vm_parm_regs[i].first();
+      assert(parm_reg->is_valid(), "invalid arg?");
+      if (parm_reg->is_reg()) {
+        OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
+        assert(can_be_java_arg(opto_parm_reg) ||
+               C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
+               opto_parm_reg == inline_cache_reg(),
+               "parameters in register must be preserved by runtime stubs");
+      }
+      for (uint j = 0; j < i; j++) {
+        assert(parm_reg != vm_parm_regs[j].first(),
+               "calling conv. must produce distinct regs");
+      }
+    }
+  }
+#endif
+
+  // Do some initial frame layout.
+
+  // Compute the old incoming SP (may be called FP) as
+  //   OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
+  _old_SP = C->compute_old_SP();
+  assert( is_even(_old_SP), "must be even" );
+
+  // Compute highest incoming stack argument as
+  //   _old_SP + out_preserve_stack_slots + incoming argument size.
+  _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
+  assert( is_even(_in_arg_limit), "out_preserve must be even" );
+  for( i = 0; i < argcnt; i++ ) {
+    // Permit args to have no register
+    _calling_convention_mask[i].Clear();
+    if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
+      continue;
+    }
+    // calling_convention returns stack arguments as a count of
+    // slots beyond OptoReg::stack0()/VMRegImpl::stack0.  We need to convert this to
+    // the allocators point of view, taking into account all the
+    // preserve area, locks & pad2.
+
+    OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
+    if( OptoReg::is_valid(reg1))
+      _calling_convention_mask[i].Insert(reg1);
+
+    OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
+    if( OptoReg::is_valid(reg2))
+      _calling_convention_mask[i].Insert(reg2);
+
+    // Saved biased stack-slot register number
+    _parm_regs[i].set_pair(reg2, reg1);
+  }
+
+  // Finally, make sure the incoming arguments take up an even number of
+  // words, in case the arguments or locals need to contain doubleword stack
+  // slots.  The rest of the system assumes that stack slot pairs (in
+  // particular, in the spill area) which look aligned will in fact be
+  // aligned relative to the stack pointer in the target machine.  Double
+  // stack slots will always be allocated aligned.
+  _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong));
+
+  // Compute highest outgoing stack argument as
+  //   _new_SP + out_preserve_stack_slots + max(outgoing argument size).
+  _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
+  assert( is_even(_out_arg_limit), "out_preserve must be even" );
+
+  if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
+    // the compiler cannot represent this method's calling sequence
+    C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
+  }
+
+  if (C->failing())  return;  // bailed out on incoming arg failure
+
+  // ---------------
+  // Collect roots of matcher trees.  Every node for which
+  // _shared[_idx] is cleared is guaranteed to not be shared, and thus
+  // can be a valid interior of some tree.
+  find_shared( C->root() );
+  find_shared( C->top() );
+
+  C->print_method("Before Matching", 2);
+
+  // Swap out to old-space; emptying new-space
+  Arena *old = C->node_arena()->move_contents(C->old_arena());
+
+  // Save debug and profile information for nodes in old space:
+  _old_node_note_array = C->node_note_array();
+  if (_old_node_note_array != NULL) {
+    C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
+                           (C->comp_arena(), _old_node_note_array->length(),
+                            0, NULL));
+  }
+
+  // Pre-size the new_node table to avoid the need for range checks.
+  grow_new_node_array(C->unique());
+
+  // Reset node counter so MachNodes start with _idx at 0
+  int nodes = C->unique(); // save value
+  C->set_unique(0);
+
+  // Recursively match trees from old space into new space.
+  // Correct leaves of new-space Nodes; they point to old-space.
+  _visited.Clear();             // Clear visit bits for xform call
+  C->set_cached_top_node(xform( C->top(), nodes ));
+  if (!C->failing()) {
+    Node* xroot =        xform( C->root(), 1 );
+    if (xroot == NULL) {
+      Matcher::soft_match_failure();  // recursive matching process failed
+      C->record_method_not_compilable("instruction match failed");
+    } else {
+      // During matching shared constants were attached to C->root()
+      // because xroot wasn't available yet, so transfer the uses to
+      // the xroot.
+      for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
+        Node* n = C->root()->fast_out(j);
+        if (C->node_arena()->contains(n)) {
+          assert(n->in(0) == C->root(), "should be control user");
+          n->set_req(0, xroot);
+          --j;
+          --jmax;
+        }
+      }
+
+      C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL);
+#ifdef ASSERT
+      verify_new_nodes_only(xroot);
+#endif
+    }
+  }
+  if (C->top() == NULL || C->root() == NULL) {
+    C->record_method_not_compilable("graph lost"); // %%% cannot happen?
+  }
+  if (C->failing()) {
+    // delete old;
+    old->destruct_contents();
+    return;
+  }
+  assert( C->top(), "" );
+  assert( C->root(), "" );
+  validate_null_checks();
+
+  // Now smoke old-space
+  NOT_DEBUG( old->destruct_contents() );
+
+  // ------------------------
+  // Set up save-on-entry registers
+  Fixup_Save_On_Entry( );
+}
+
+
+//------------------------------Fixup_Save_On_Entry----------------------------
+// The stated purpose of this routine is to take care of save-on-entry
+// registers.  However, the overall goal of the Match phase is to convert into
+// machine-specific instructions which have RegMasks to guide allocation.
+// So what this procedure really does is put a valid RegMask on each input
+// to the machine-specific variations of all Return, TailCall and Halt
+// instructions.  It also adds edgs to define the save-on-entry values (and of
+// course gives them a mask).
+
+static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
+  RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
+  // Do all the pre-defined register masks
+  rms[TypeFunc::Control  ] = RegMask::Empty;
+  rms[TypeFunc::I_O      ] = RegMask::Empty;
+  rms[TypeFunc::Memory   ] = RegMask::Empty;
+  rms[TypeFunc::ReturnAdr] = ret_adr;
+  rms[TypeFunc::FramePtr ] = fp;
+  return rms;
+}
+
+//---------------------------init_first_stack_mask-----------------------------
+// Create the initial stack mask used by values spilling to the stack.
+// Disallow any debug info in outgoing argument areas by setting the
+// initial mask accordingly.
+void Matcher::init_first_stack_mask() {
+
+  // Allocate storage for spill masks as masks for the appropriate load type.
+  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask)*10);
+  idealreg2spillmask[Op_RegI] = &rms[0];
+  idealreg2spillmask[Op_RegL] = &rms[1];
+  idealreg2spillmask[Op_RegF] = &rms[2];
+  idealreg2spillmask[Op_RegD] = &rms[3];
+  idealreg2spillmask[Op_RegP] = &rms[4];
+  idealreg2debugmask[Op_RegI] = &rms[5];
+  idealreg2debugmask[Op_RegL] = &rms[6];
+  idealreg2debugmask[Op_RegF] = &rms[7];
+  idealreg2debugmask[Op_RegD] = &rms[8];
+  idealreg2debugmask[Op_RegP] = &rms[9];
+
+  OptoReg::Name i;
+
+  // At first, start with the empty mask
+  C->FIRST_STACK_mask().Clear();
+
+  // Add in the incoming argument area
+  OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
+  for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1))
+    C->FIRST_STACK_mask().Insert(i);
+
+  // Add in all bits past the outgoing argument area
+  guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
+            "must be able to represent all call arguments in reg mask");
+  init = _out_arg_limit;
+  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
+    C->FIRST_STACK_mask().Insert(i);
+
+  // Finally, set the "infinite stack" bit.
+  C->FIRST_STACK_mask().set_AllStack();
+
+  // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
+  *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
+   idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
+  *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
+   idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
+  *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
+   idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
+  *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
+   idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
+  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
+   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
+
+  // Make up debug masks.  Any spill slot plus callee-save registers.
+  // Caller-save registers are assumed to be trashable by the various
+  // inline-cache fixup routines.
+  *idealreg2debugmask[Op_RegI]= *idealreg2spillmask[Op_RegI];
+  *idealreg2debugmask[Op_RegL]= *idealreg2spillmask[Op_RegL];
+  *idealreg2debugmask[Op_RegF]= *idealreg2spillmask[Op_RegF];
+  *idealreg2debugmask[Op_RegD]= *idealreg2spillmask[Op_RegD];
+  *idealreg2debugmask[Op_RegP]= *idealreg2spillmask[Op_RegP];
+
+  // Prevent stub compilations from attempting to reference
+  // callee-saved registers from debug info
+  bool exclude_soe = !Compile::current()->is_method_compilation();
+
+  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+    // registers the caller has to save do not work
+    if( _register_save_policy[i] == 'C' ||
+        _register_save_policy[i] == 'A' ||
+        (_register_save_policy[i] == 'E' && exclude_soe) ) {
+      idealreg2debugmask[Op_RegI]->Remove(i); // Exclude save-on-call
+      idealreg2debugmask[Op_RegL]->Remove(i); // registers from debug
+      idealreg2debugmask[Op_RegF]->Remove(i); // masks
+      idealreg2debugmask[Op_RegD]->Remove(i);
+      idealreg2debugmask[Op_RegP]->Remove(i);
+    }
+  }
+}
+
+//---------------------------is_save_on_entry----------------------------------
+bool Matcher::is_save_on_entry( int reg ) {
+  return
+    _register_save_policy[reg] == 'E' ||
+    _register_save_policy[reg] == 'A' || // Save-on-entry register?
+    // Also save argument registers in the trampolining stubs
+    (C->save_argument_registers() && is_spillable_arg(reg));
+}
+
+//---------------------------Fixup_Save_On_Entry-------------------------------
+void Matcher::Fixup_Save_On_Entry( ) {
+  init_first_stack_mask();
+
+  Node *root = C->root();       // Short name for root
+  // Count number of save-on-entry registers.
+  uint soe_cnt = number_of_saved_registers();
+  uint i;
+
+  // Find the procedure Start Node
+  StartNode *start = C->start();
+  assert( start, "Expect a start node" );
+
+  // Save argument registers in the trampolining stubs
+  if( C->save_argument_registers() )
+    for( i = 0; i < _last_Mach_Reg; i++ )
+      if( is_spillable_arg(i) )
+        soe_cnt++;
+
+  // Input RegMask array shared by all Returns.
+  // The type for doubles and longs has a count of 2, but
+  // there is only 1 returned value
+  uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
+  RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+  // Returns have 0 or 1 returned values depending on call signature.
+  // Return register is specified by return_value in the AD file.
+  if (ret_edge_cnt > TypeFunc::Parms)
+    ret_rms[TypeFunc::Parms+0] = _return_value_mask;
+
+  // Input RegMask array shared by all Rethrows.
+  uint reth_edge_cnt = TypeFunc::Parms+1;
+  RegMask *reth_rms  = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+  // Rethrow takes exception oop only, but in the argument 0 slot.
+  reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)];
+#ifdef _LP64
+  // Need two slots for ptrs in 64-bit land
+  reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1));
+#endif
+
+  // Input RegMask array shared by all TailCalls
+  uint tail_call_edge_cnt = TypeFunc::Parms+2;
+  RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+  // Input RegMask array shared by all TailJumps
+  uint tail_jump_edge_cnt = TypeFunc::Parms+2;
+  RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+  // TailCalls have 2 returned values (target & moop), whose masks come
+  // from the usual MachNode/MachOper mechanism.  Find a sample
+  // TailCall to extract these masks and put the correct masks into
+  // the tail_call_rms array.
+  for( i=1; i < root->req(); i++ ) {
+    MachReturnNode *m = root->in(i)->as_MachReturn();
+    if( m->ideal_Opcode() == Op_TailCall ) {
+      tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
+      tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
+      break;
+    }
+  }
+
+  // TailJumps have 2 returned values (target & ex_oop), whose masks come
+  // from the usual MachNode/MachOper mechanism.  Find a sample
+  // TailJump to extract these masks and put the correct masks into
+  // the tail_jump_rms array.
+  for( i=1; i < root->req(); i++ ) {
+    MachReturnNode *m = root->in(i)->as_MachReturn();
+    if( m->ideal_Opcode() == Op_TailJump ) {
+      tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
+      tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
+      break;
+    }
+  }
+
+  // Input RegMask array shared by all Halts
+  uint halt_edge_cnt = TypeFunc::Parms;
+  RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
+
+  // Capture the return input masks into each exit flavor
+  for( i=1; i < root->req(); i++ ) {
+    MachReturnNode *exit = root->in(i)->as_MachReturn();
+    switch( exit->ideal_Opcode() ) {
+      case Op_Return   : exit->_in_rms = ret_rms;  break;
+      case Op_Rethrow  : exit->_in_rms = reth_rms; break;
+      case Op_TailCall : exit->_in_rms = tail_call_rms; break;
+      case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
+      case Op_Halt     : exit->_in_rms = halt_rms; break;
+      default          : ShouldNotReachHere();
+    }
+  }
+
+  // Next unused projection number from Start.
+  int proj_cnt = C->tf()->domain()->cnt();
+
+  // Do all the save-on-entry registers.  Make projections from Start for
+  // them, and give them a use at the exit points.  To the allocator, they
+  // look like incoming register arguments.
+  for( i = 0; i < _last_Mach_Reg; i++ ) {
+    if( is_save_on_entry(i) ) {
+
+      // Add the save-on-entry to the mask array
+      ret_rms      [      ret_edge_cnt] = mreg2regmask[i];
+      reth_rms     [     reth_edge_cnt] = mreg2regmask[i];
+      tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i];
+      tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i];
+      // Halts need the SOE registers, but only in the stack as debug info.
+      // A just-prior uncommon-trap or deoptimization will use the SOE regs.
+      halt_rms     [     halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]];
+
+      Node *mproj;
+
+      // Is this a RegF low half of a RegD?  Double up 2 adjacent RegF's
+      // into a single RegD.
+      if( (i&1) == 0 &&
+          _register_save_type[i  ] == Op_RegF &&
+          _register_save_type[i+1] == Op_RegF &&
+          is_save_on_entry(i+1) ) {
+        // Add other bit for double
+        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
+        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
+        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
+        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
+        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
+        mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
+        proj_cnt += 2;          // Skip 2 for doubles
+      }
+      else if( (i&1) == 1 &&    // Else check for high half of double
+               _register_save_type[i-1] == Op_RegF &&
+               _register_save_type[i  ] == Op_RegF &&
+               is_save_on_entry(i-1) ) {
+        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
+        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
+        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
+        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
+        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
+        mproj = C->top();
+      }
+      // Is this a RegI low half of a RegL?  Double up 2 adjacent RegI's
+      // into a single RegL.
+      else if( (i&1) == 0 &&
+          _register_save_type[i  ] == Op_RegI &&
+          _register_save_type[i+1] == Op_RegI &&
+        is_save_on_entry(i+1) ) {
+        // Add other bit for long
+        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
+        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
+        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
+        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
+        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
+        mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
+        proj_cnt += 2;          // Skip 2 for longs
+      }
+      else if( (i&1) == 1 &&    // Else check for high half of long
+               _register_save_type[i-1] == Op_RegI &&
+               _register_save_type[i  ] == Op_RegI &&
+               is_save_on_entry(i-1) ) {
+        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
+        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
+        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
+        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
+        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
+        mproj = C->top();
+      } else {
+        // Make a projection for it off the Start
+        mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
+      }
+
+      ret_edge_cnt ++;
+      reth_edge_cnt ++;
+      tail_call_edge_cnt ++;
+      tail_jump_edge_cnt ++;
+      halt_edge_cnt ++;
+
+      // Add a use of the SOE register to all exit paths
+      for( uint j=1; j < root->req(); j++ )
+        root->in(j)->add_req(mproj);
+    } // End of if a save-on-entry register
+  } // End of for all machine registers
+}
+
+//------------------------------init_spill_mask--------------------------------
+void Matcher::init_spill_mask( Node *ret ) {
+  if( idealreg2regmask[Op_RegI] ) return; // One time only init
+
+  OptoReg::c_frame_pointer = c_frame_pointer();
+  c_frame_ptr_mask = c_frame_pointer();
+#ifdef _LP64
+  // pointers are twice as big
+  c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1));
+#endif
+
+  // Start at OptoReg::stack0()
+  STACK_ONLY_mask.Clear();
+  OptoReg::Name init = OptoReg::stack2reg(0);
+  // STACK_ONLY_mask is all stack bits
+  OptoReg::Name i;
+  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
+    STACK_ONLY_mask.Insert(i);
+  // Also set the "infinite stack" bit.
+  STACK_ONLY_mask.set_AllStack();
+
+  // Copy the register names over into the shared world
+  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
+    // SharedInfo::regName[i] = regName[i];
+    // Handy RegMasks per machine register
+    mreg2regmask[i].Insert(i);
+  }
+
+  // Grab the Frame Pointer
+  Node *fp  = ret->in(TypeFunc::FramePtr);
+  Node *mem = ret->in(TypeFunc::Memory);
+  const TypePtr* atp = TypePtr::BOTTOM;
+  // Share frame pointer while making spill ops
+  set_shared(fp);
+
+  // Compute generic short-offset Loads
+  MachNode *spillI  = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp));
+  MachNode *spillL  = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp));
+  MachNode *spillF  = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp));
+  MachNode *spillD  = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp));
+  MachNode *spillP  = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
+  assert(spillI != NULL && spillL != NULL && spillF != NULL &&
+         spillD != NULL && spillP != NULL, "");
+
+  // Get the ADLC notion of the right regmask, for each basic type.
+  idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
+  idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
+  idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
+  idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
+  idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
+}
+
+#ifdef ASSERT
+static void match_alias_type(Compile* C, Node* n, Node* m) {
+  if (!VerifyAliases)  return;  // do not go looking for trouble by default
+  const TypePtr* nat = n->adr_type();
+  const TypePtr* mat = m->adr_type();
+  int nidx = C->get_alias_index(nat);
+  int midx = C->get_alias_index(mat);
+  // Detune the assert for cases like (AndI 0xFF (LoadB p)).
+  if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
+    for (uint i = 1; i < n->req(); i++) {
+      Node* n1 = n->in(i);
+      const TypePtr* n1at = n1->adr_type();
+      if (n1at != NULL) {
+        nat = n1at;
+        nidx = C->get_alias_index(n1at);
+      }
+    }
+  }
+  // %%% Kludgery.  Instead, fix ideal adr_type methods for all these cases:
+  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
+    switch (n->Opcode()) {
+    case Op_PrefetchRead:
+    case Op_PrefetchWrite:
+      nidx = Compile::AliasIdxRaw;
+      nat = TypeRawPtr::BOTTOM;
+      break;
+    }
+  }
+  if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
+    switch (n->Opcode()) {
+    case Op_ClearArray:
+      midx = Compile::AliasIdxRaw;
+      mat = TypeRawPtr::BOTTOM;
+      break;
+    }
+  }
+  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
+    switch (n->Opcode()) {
+    case Op_Return:
+    case Op_Rethrow:
+    case Op_Halt:
+    case Op_TailCall:
+    case Op_TailJump:
+      nidx = Compile::AliasIdxBot;
+      nat = TypePtr::BOTTOM;
+      break;
+    }
+  }
+  if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
+    switch (n->Opcode()) {
+    case Op_StrComp:
+    case Op_MemBarVolatile:
+    case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
+      nidx = Compile::AliasIdxTop;
+      nat = NULL;
+      break;
+    }
+  }
+  if (nidx != midx) {
+    if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
+      tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
+      n->dump();
+      m->dump();
+    }
+    assert(C->subsume_loads() && C->must_alias(nat, midx),
+           "must not lose alias info when matching");
+  }
+}
+#endif
+
+
+//------------------------------MStack-----------------------------------------
+// State and MStack class used in xform() and find_shared() iterative methods.
+enum Node_State { Pre_Visit,  // node has to be pre-visited
+                      Visit,  // visit node
+                 Post_Visit,  // post-visit node
+             Alt_Post_Visit   // alternative post-visit path
+                };
+
+class MStack: public Node_Stack {
+  public:
+    MStack(int size) : Node_Stack(size) { }
+
+    void push(Node *n, Node_State ns) {
+      Node_Stack::push(n, (uint)ns);
+    }
+    void push(Node *n, Node_State ns, Node *parent, int indx) {
+      ++_inode_top;
+      if ((_inode_top + 1) >= _inode_max) grow();
+      _inode_top->node = parent;
+      _inode_top->indx = (uint)indx;
+      ++_inode_top;
+      _inode_top->node = n;
+      _inode_top->indx = (uint)ns;
+    }
+    Node *parent() {
+      pop();
+      return node();
+    }
+    Node_State state() const {
+      return (Node_State)index();
+    }
+    void set_state(Node_State ns) {
+      set_index((uint)ns);
+    }
+};
+
+
+//------------------------------xform------------------------------------------
+// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
+// Node in new-space.  Given a new-space Node, recursively walk his children.
+Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
+Node *Matcher::xform( Node *n, int max_stack ) {
+  // Use one stack to keep both: child's node/state and parent's node/index
+  MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2
+  mstack.push(n, Visit, NULL, -1);  // set NULL as parent to indicate root
+
+  while (mstack.is_nonempty()) {
+    n = mstack.node();          // Leave node on stack
+    Node_State nstate = mstack.state();
+    if (nstate == Visit) {
+      mstack.set_state(Post_Visit);
+      Node *oldn = n;
+      // Old-space or new-space check
+      if (!C->node_arena()->contains(n)) {
+        // Old space!
+        Node* m;
+        if (has_new_node(n)) {  // Not yet Label/Reduced
+          m = new_node(n);
+        } else {
+          if (!is_dontcare(n)) { // Matcher can match this guy
+            // Calls match special.  They match alone with no children.
+            // Their children, the incoming arguments, match normally.
+            m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
+            if (C->failing())  return NULL;
+            if (m == NULL) { Matcher::soft_match_failure(); return NULL; }
+          } else {                  // Nothing the matcher cares about
+            if( n->is_Proj() && n->in(0)->is_Multi()) {       // Projections?
+              // Convert to machine-dependent projection
+              m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
+              if (m->in(0) != NULL) // m might be top
+                collect_null_checks(m);
+            } else {                // Else just a regular 'ol guy
+              m = n->clone();       // So just clone into new-space
+              // Def-Use edges will be added incrementally as Uses
+              // of this node are matched.
+              assert(m->outcnt() == 0, "no Uses of this clone yet");
+            }
+          }
+
+          set_new_node(n, m);       // Map old to new
+          if (_old_node_note_array != NULL) {
+            Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
+                                                  n->_idx);
+            C->set_node_notes_at(m->_idx, nn);
+          }
+          debug_only(match_alias_type(C, n, m));
+        }
+        n = m;    // n is now a new-space node
+        mstack.set_node(n);
+      }
+
+      // New space!
+      if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
+
+      int i;
+      // Put precedence edges on stack first (match them last).
+      for (i = oldn->req(); (uint)i < oldn->len(); i++) {
+        Node *m = oldn->in(i);
+        if (m == NULL) break;
+        // set -1 to call add_prec() instead of set_req() during Step1
+        mstack.push(m, Visit, n, -1);
+      }
+
+      // For constant debug info, I'd rather have unmatched constants.
+      int cnt = n->req();
+      JVMState* jvms = n->jvms();
+      int debug_cnt = jvms ? jvms->debug_start() : cnt;
+
+      // Now do only debug info.  Clone constants rather than matching.
+      // Constants are represented directly in the debug info without
+      // the need for executable machine instructions.
+      // Monitor boxes are also represented directly.
+      for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
+        Node *m = n->in(i);          // Get input
+        int op = m->Opcode();
+        assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
+        if( op == Op_ConI || op == Op_ConP ||
+            op == Op_ConF || op == Op_ConD || op == Op_ConL
+            // || op == Op_BoxLock  // %%%% enable this and remove (+++) in chaitin.cpp
+            ) {
+          m = m->clone();
+          mstack.push(m, Post_Visit, n, i); // Don't neet to visit
+          mstack.push(m->in(0), Visit, m, 0);
+        } else {
+          mstack.push(m, Visit, n, i);
+        }
+      }
+
+      // And now walk his children, and convert his inputs to new-space.
+      for( ; i >= 0; --i ) { // For all normal inputs do
+        Node *m = n->in(i);  // Get input
+        if(m != NULL)
+          mstack.push(m, Visit, n, i);
+      }
+
+    }
+    else if (nstate == Post_Visit) {
+      // Set xformed input
+      Node *p = mstack.parent();
+      if (p != NULL) { // root doesn't have parent
+        int i = (int)mstack.index();
+        if (i >= 0)
+          p->set_req(i, n); // required input
+        else if (i == -1)
+          p->add_prec(n);   // precedence input
+        else
+          ShouldNotReachHere();
+      }
+      mstack.pop(); // remove processed node from stack
+    }
+    else {
+      ShouldNotReachHere();
+    }
+  } // while (mstack.is_nonempty())
+  return n; // Return new-space Node
+}
+
+//------------------------------warp_outgoing_stk_arg------------------------
+OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
+  // Convert outgoing argument location to a pre-biased stack offset
+  if (reg->is_stack()) {
+    OptoReg::Name warped = reg->reg2stack();
+    // Adjust the stack slot offset to be the register number used
+    // by the allocator.
+    warped = OptoReg::add(begin_out_arg_area, warped);
+    // Keep track of the largest numbered stack slot used for an arg.
+    // Largest used slot per call-site indicates the amount of stack
+    // that is killed by the call.
+    if( warped >= out_arg_limit_per_call )
+      out_arg_limit_per_call = OptoReg::add(warped,1);
+    if (!RegMask::can_represent(warped)) {
+      C->record_method_not_compilable_all_tiers("unsupported calling sequence");
+      return OptoReg::Bad;
+    }
+    return warped;
+  }
+  return OptoReg::as_OptoReg(reg);
+}
+
+
+//------------------------------match_sfpt-------------------------------------
+// Helper function to match call instructions.  Calls match special.
+// They match alone with no children.  Their children, the incoming
+// arguments, match normally.
+MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
+  MachSafePointNode *msfpt = NULL;
+  MachCallNode      *mcall = NULL;
+  uint               cnt;
+  // Split out case for SafePoint vs Call
+  CallNode *call;
+  const TypeTuple *domain;
+  ciMethod*        method = NULL;
+  if( sfpt->is_Call() ) {
+    call = sfpt->as_Call();
+    domain = call->tf()->domain();
+    cnt = domain->cnt();
+
+    // Match just the call, nothing else
+    MachNode *m = match_tree(call);
+    if (C->failing())  return NULL;
+    if( m == NULL ) { Matcher::soft_match_failure(); return NULL; }
+
+    // Copy data from the Ideal SafePoint to the machine version
+    mcall = m->as_MachCall();
+
+    mcall->set_tf(         call->tf());
+    mcall->set_entry_point(call->entry_point());
+    mcall->set_cnt(        call->cnt());
+
+    if( mcall->is_MachCallJava() ) {
+      MachCallJavaNode *mcall_java  = mcall->as_MachCallJava();
+      const CallJavaNode *call_java =  call->as_CallJava();
+      method = call_java->method();
+      mcall_java->_method = method;
+      mcall_java->_bci = call_java->_bci;
+      mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
+      if( mcall_java->is_MachCallStaticJava() )
+        mcall_java->as_MachCallStaticJava()->_name =
+         call_java->as_CallStaticJava()->_name;
+      if( mcall_java->is_MachCallDynamicJava() )
+        mcall_java->as_MachCallDynamicJava()->_vtable_index =
+         call_java->as_CallDynamicJava()->_vtable_index;
+    }
+    else if( mcall->is_MachCallRuntime() ) {
+      mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name;
+    }
+    msfpt = mcall;
+  }
+  // This is a non-call safepoint
+  else {
+    call = NULL;
+    domain = NULL;
+    MachNode *mn = match_tree(sfpt);
+    if (C->failing())  return NULL;
+    msfpt = mn->as_MachSafePoint();
+    cnt = TypeFunc::Parms;
+  }
+
+  // Advertise the correct memory effects (for anti-dependence computation).
+  msfpt->set_adr_type(sfpt->adr_type());
+
+  // Allocate a private array of RegMasks.  These RegMasks are not shared.
+  msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
+  // Empty them all.
+  memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt );
+
+  // Do all the pre-defined non-Empty register masks
+  msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask;
+  msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask;
+
+  // Place first outgoing argument can possibly be put.
+  OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
+  assert( is_even(begin_out_arg_area), "" );
+  // Compute max outgoing register number per call site.
+  OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
+  // Calls to C may hammer extra stack slots above and beyond any arguments.
+  // These are usually backing store for register arguments for varargs.
+  if( call != NULL && call->is_CallRuntime() )
+    out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
+
+
+  // Do the normal argument list (parameters) register masks
+  int argcnt = cnt - TypeFunc::Parms;
+  if( argcnt > 0 ) {          // Skip it all if we have no args
+    BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
+    VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
+    int i;
+    for( i = 0; i < argcnt; i++ ) {
+      sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
+    }
+    // V-call to pick proper calling convention
+    call->calling_convention( sig_bt, parm_regs, argcnt );
+
+#ifdef ASSERT
+    // Sanity check users' calling convention.  Really handy during
+    // the initial porting effort.  Fairly expensive otherwise.
+    { for (int i = 0; i<argcnt; i++) {
+      if( !parm_regs[i].first()->is_valid() &&
+          !parm_regs[i].second()->is_valid() ) continue;
+      VMReg reg1 = parm_regs[i].first();
+      VMReg reg2 = parm_regs[i].second();
+      for (int j = 0; j < i; j++) {
+        if( !parm_regs[j].first()->is_valid() &&
+            !parm_regs[j].second()->is_valid() ) continue;
+        VMReg reg3 = parm_regs[j].first();
+        VMReg reg4 = parm_regs[j].second();
+        if( !reg1->is_valid() ) {
+          assert( !reg2->is_valid(), "valid halvsies" );
+        } else if( !reg3->is_valid() ) {
+          assert( !reg4->is_valid(), "valid halvsies" );
+        } else {
+          assert( reg1 != reg2, "calling conv. must produce distinct regs");
+          assert( reg1 != reg3, "calling conv. must produce distinct regs");
+          assert( reg1 != reg4, "calling conv. must produce distinct regs");
+          assert( reg2 != reg3, "calling conv. must produce distinct regs");
+          assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
+          assert( reg3 != reg4, "calling conv. must produce distinct regs");
+        }
+      }
+    }
+    }
+#endif
+
+    // Visit each argument.  Compute its outgoing register mask.
+    // Return results now can have 2 bits returned.
+    // Compute max over all outgoing arguments both per call-site
+    // and over the entire method.
+    for( i = 0; i < argcnt; i++ ) {
+      // Address of incoming argument mask to fill in
+      RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
+      if( !parm_regs[i].first()->is_valid() &&
+          !parm_regs[i].second()->is_valid() ) {
+        continue;               // Avoid Halves
+      }
+      // Grab first register, adjust stack slots and insert in mask.
+      OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
+      if (OptoReg::is_valid(reg1))
+        rm->Insert( reg1 );
+      // Grab second register (if any), adjust stack slots and insert in mask.
+      OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
+      if (OptoReg::is_valid(reg2))
+        rm->Insert( reg2 );
+    } // End of for all arguments
+
+    // Compute number of stack slots needed to restore stack in case of
+    // Pascal-style argument popping.
+    mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
+  }
+
+  // Compute the max stack slot killed by any call.  These will not be
+  // available for debug info, and will be used to adjust FIRST_STACK_mask
+  // after all call sites have been visited.
+  if( _out_arg_limit < out_arg_limit_per_call)
+    _out_arg_limit = out_arg_limit_per_call;
+
+  if (mcall) {
+    // Kill the outgoing argument area, including any non-argument holes and
+    // any legacy C-killed slots.  Use Fat-Projections to do the killing.
+    // Since the max-per-method covers the max-per-call-site and debug info
+    // is excluded on the max-per-method basis, debug info cannot land in
+    // this killed area.
+    uint r_cnt = mcall->tf()->range()->cnt();
+    MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
+    if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
+      C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
+    } else {
+      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
+        proj->_rout.Insert(OptoReg::Name(i));
+    }
+    if( proj->_rout.is_NotEmpty() )
+      _proj_list.push(proj);
+  }
+  // Transfer the safepoint information from the call to the mcall
+  // Move the JVMState list
+  msfpt->set_jvms(sfpt->jvms());
+  for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
+    jvms->set_map(sfpt);
+  }
+
+  // Debug inputs begin just after the last incoming parameter
+  assert( (mcall == NULL) || (mcall->jvms() == NULL) ||
+          (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" );
+
+  // Move the OopMap
+  msfpt->_oop_map = sfpt->_oop_map;
+
+  // Registers killed by the call are set in the local scheduling pass
+  // of Global Code Motion.
+  return msfpt;
+}
+
+//---------------------------match_tree----------------------------------------
+// Match a Ideal Node DAG - turn it into a tree; Label & Reduce.  Used as part
+// of the whole-sale conversion from Ideal to Mach Nodes.  Also used for
+// making GotoNodes while building the CFG and in init_spill_mask() to identify
+// a Load's result RegMask for memoization in idealreg2regmask[]
+MachNode *Matcher::match_tree( const Node *n ) {
+  assert( n->Opcode() != Op_Phi, "cannot match" );
+  assert( !n->is_block_start(), "cannot match" );
+  // Set the mark for all locally allocated State objects.
+  // When this call returns, the _states_arena arena will be reset
+  // freeing all State objects.
+  ResourceMark rm( &_states_arena );
+
+  LabelRootDepth = 0;
+
+  // StoreNodes require their Memory input to match any LoadNodes
+  Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
+
+  // State object for root node of match tree
+  // Allocate it on _states_arena - stack allocation can cause stack overflow.
+  State *s = new (&_states_arena) State;
+  s->_kids[0] = NULL;
+  s->_kids[1] = NULL;
+  s->_leaf = (Node*)n;
+  // Label the input tree, allocating labels from top-level arena
+  Label_Root( n, s, n->in(0), mem );
+  if (C->failing())  return NULL;
+
+  // The minimum cost match for the whole tree is found at the root State
+  uint mincost = max_juint;
+  uint cost = max_juint;
+  uint i;
+  for( i = 0; i < NUM_OPERANDS; i++ ) {
+    if( s->valid(i) &&                // valid entry and
+        s->_cost[i] < cost &&         // low cost and
+        s->_rule[i] >= NUM_OPERANDS ) // not an operand
+      cost = s->_cost[mincost=i];
+  }
+  if (mincost == max_juint) {
+#ifndef PRODUCT
+    tty->print("No matching rule for:");
+    s->dump();
+#endif
+    Matcher::soft_match_failure();
+    return NULL;
+  }
+  // Reduce input tree based upon the state labels to machine Nodes
+  MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
+#ifdef ASSERT
+  _old2new_map.map(n->_idx, m);
+#endif
+
+  // Add any Matcher-ignored edges
+  uint cnt = n->req();
+  uint start = 1;
+  if( mem != (Node*)1 ) start = MemNode::Memory+1;
+  if( n->Opcode() == Op_AddP ) {
+    assert( mem == (Node*)1, "" );
+    start = AddPNode::Base+1;
+  }
+  for( i = start; i < cnt; i++ ) {
+    if( !n->match_edge(i) ) {
+      if( i < m->req() )
+        m->ins_req( i, n->in(i) );
+      else
+        m->add_req( n->in(i) );
+    }
+  }
+
+  return m;
+}
+
+
+//------------------------------match_into_reg---------------------------------
+// Choose to either match this Node in a register or part of the current
+// match tree.  Return true for requiring a register and false for matching
+// as part of the current match tree.
+static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
+
+  const Type *t = m->bottom_type();
+
+  if( t->singleton() ) {
+    // Never force constants into registers.  Allow them to match as
+    // constants or registers.  Copies of the same value will share
+    // the same register.  See find_shared_constant.
+    return false;
+  } else {                      // Not a constant
+    // Stop recursion if they have different Controls.
+    // Slot 0 of constants is not really a Control.
+    if( control && m->in(0) && control != m->in(0) ) {
+
+      // Actually, we can live with the most conservative control we
+      // find, if it post-dominates the others.  This allows us to
+      // pick up load/op/store trees where the load can float a little
+      // above the store.
+      Node *x = control;
+      const uint max_scan = 6;   // Arbitrary scan cutoff
+      uint j;
+      for( j=0; j<max_scan; j++ ) {
+        if( x->is_Region() )    // Bail out at merge points
+          return true;
+        x = x->in(0);
+        if( x == m->in(0) )     // Does 'control' post-dominate
+          break;                // m->in(0)?  If so, we can use it
+      }
+      if( j == max_scan )       // No post-domination before scan end?
+        return true;            // Then break the match tree up
+    }
+  }
+
+  // Not forceably cloning.  If shared, put it into a register.
+  return shared;
+}
+
+
+//------------------------------Instruction Selection--------------------------
+// Label method walks a "tree" of nodes, using the ADLC generated DFA to match
+// ideal nodes to machine instructions.  Trees are delimited by shared Nodes,
+// things the Matcher does not match (e.g., Memory), and things with different
+// Controls (hence forced into different blocks).  We pass in the Control
+// selected for this entire State tree.
+
+// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
+// Store and the Load must have identical Memories (as well as identical
+// pointers).  Since the Matcher does not have anything for Memory (and
+// does not handle DAGs), I have to match the Memory input myself.  If the
+// Tree root is a Store, I require all Loads to have the identical memory.
+Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){
+  // Since Label_Root is a recursive function, its possible that we might run
+  // out of stack space.  See bugs 6272980 & 6227033 for more info.
+  LabelRootDepth++;
+  if (LabelRootDepth > MaxLabelRootDepth) {
+    C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth");
+    return NULL;
+  }
+  uint care = 0;                // Edges matcher cares about
+  uint cnt = n->req();
+  uint i = 0;
+
+  // Examine children for memory state
+  // Can only subsume a child into your match-tree if that child's memory state
+  // is not modified along the path to another input.
+  // It is unsafe even if the other inputs are separate roots.
+  Node *input_mem = NULL;
+  for( i = 1; i < cnt; i++ ) {
+    if( !n->match_edge(i) ) continue;
+    Node *m = n->in(i);         // Get ith input
+    assert( m, "expect non-null children" );
+    if( m->is_Load() ) {
+      if( input_mem == NULL ) {
+        input_mem = m->in(MemNode::Memory);
+      } else if( input_mem != m->in(MemNode::Memory) ) {
+        input_mem = NodeSentinel;
+      }
+    }
+  }
+
+  for( i = 1; i < cnt; i++ ){// For my children
+    if( !n->match_edge(i) ) continue;
+    Node *m = n->in(i);         // Get ith input
+    // Allocate states out of a private arena
+    State *s = new (&_states_arena) State;
+    svec->_kids[care++] = s;
+    assert( care <= 2, "binary only for now" );
+
+    // Recursively label the State tree.
+    s->_kids[0] = NULL;
+    s->_kids[1] = NULL;
+    s->_leaf = m;
+
+    // Check for leaves of the State Tree; things that cannot be a part of
+    // the current tree.  If it finds any, that value is matched as a
+    // register operand.  If not, then the normal matching is used.
+    if( match_into_reg(n, m, control, i, is_shared(m)) ||
+        //
+        // Stop recursion if this is LoadNode and the root of this tree is a
+        // StoreNode and the load & store have different memories.
+        ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
+        // Can NOT include the match of a subtree when its memory state
+        // is used by any of the other subtrees
+        (input_mem == NodeSentinel) ) {
+#ifndef PRODUCT
+      // Print when we exclude matching due to different memory states at input-loads
+      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
+        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
+        tty->print_cr("invalid input_mem");
+      }
+#endif
+      // Switch to a register-only opcode; this value must be in a register
+      // and cannot be subsumed as part of a larger instruction.
+      s->DFA( m->ideal_reg(), m );
+
+    } else {
+      // If match tree has no control and we do, adopt it for entire tree
+      if( control == NULL && m->in(0) != NULL && m->req() > 1 )
+        control = m->in(0);         // Pick up control
+      // Else match as a normal part of the match tree.
+      control = Label_Root(m,s,control,mem);
+      if (C->failing()) return NULL;
+    }
+  }
+
+
+  // Call DFA to match this node, and return
+  svec->DFA( n->Opcode(), n );
+
+#ifdef ASSERT
+  uint x;
+  for( x = 0; x < _LAST_MACH_OPER; x++ )
+    if( svec->valid(x) )
+      break;
+
+  if (x >= _LAST_MACH_OPER) {
+    n->dump();
+    svec->dump();
+    assert( false, "bad AD file" );
+  }
+#endif
+  return control;
+}
+
+
+// Con nodes reduced using the same rule can share their MachNode
+// which reduces the number of copies of a constant in the final
+// program.  The register allocator is free to split uses later to
+// split live ranges.
+MachNode* Matcher::find_shared_constant(Node* leaf, uint rule) {
+  if (!leaf->is_Con()) return NULL;
+
+  // See if this Con has already been reduced using this rule.
+  if (_shared_constants.Size() <= leaf->_idx) return NULL;
+  MachNode* last = (MachNode*)_shared_constants.at(leaf->_idx);
+  if (last != NULL && rule == last->rule()) {
+    // Get the new space root.
+    Node* xroot = new_node(C->root());
+    if (xroot == NULL) {
+      // This shouldn't happen give the order of matching.
+      return NULL;
+    }
+
+    // Shared constants need to have their control be root so they
+    // can be scheduled properly.
+    Node* control = last->in(0);
+    if (control != xroot) {
+      if (control == NULL || control == C->root()) {
+        last->set_req(0, xroot);
+      } else {
+        assert(false, "unexpected control");
+        return NULL;
+      }
+    }
+    return last;
+  }
+  return NULL;
+}
+
+
+//------------------------------ReduceInst-------------------------------------
+// Reduce a State tree (with given Control) into a tree of MachNodes.
+// This routine (and it's cohort ReduceOper) convert Ideal Nodes into
+// complicated machine Nodes.  Each MachNode covers some tree of Ideal Nodes.
+// Each MachNode has a number of complicated MachOper operands; each
+// MachOper also covers a further tree of Ideal Nodes.
+
+// The root of the Ideal match tree is always an instruction, so we enter
+// the recursion here.  After building the MachNode, we need to recurse
+// the tree checking for these cases:
+// (1) Child is an instruction -
+//     Build the instruction (recursively), add it as an edge.
+//     Build a simple operand (register) to hold the result of the instruction.
+// (2) Child is an interior part of an instruction -
+//     Skip over it (do nothing)
+// (3) Child is the start of a operand -
+//     Build the operand, place it inside the instruction
+//     Call ReduceOper.
+MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
+  assert( rule >= NUM_OPERANDS, "called with operand rule" );
+
+  MachNode* shared_con = find_shared_constant(s->_leaf, rule);
+  if (shared_con != NULL) {
+    return shared_con;
+  }
+
+  // Build the object to represent this state & prepare for recursive calls
+  MachNode *mach = s->MachNodeGenerator( rule, C );
+  mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
+  assert( mach->_opnds[0] != NULL, "Missing result operand" );
+  Node *leaf = s->_leaf;
+  // Check for instruction or instruction chain rule
+  if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
+    // Instruction
+    mach->add_req( leaf->in(0) ); // Set initial control
+    // Reduce interior of complex instruction
+    ReduceInst_Interior( s, rule, mem, mach, 1 );
+  } else {
+    // Instruction chain rules are data-dependent on their inputs
+    mach->add_req(0);             // Set initial control to none
+    ReduceInst_Chain_Rule( s, rule, mem, mach );
+  }
+
+  // If a Memory was used, insert a Memory edge
+  if( mem != (Node*)1 )
+    mach->ins_req(MemNode::Memory,mem);
+
+  // If the _leaf is an AddP, insert the base edge
+  if( leaf->Opcode() == Op_AddP )
+    mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
+
+  uint num_proj = _proj_list.size();
+
+  // Perform any 1-to-many expansions required
+  MachNode *ex = mach->Expand(s,_proj_list);
+  if( ex != mach ) {
+    assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
+    if( ex->in(1)->is_Con() )
+      ex->in(1)->set_req(0, C->root());
+    // Remove old node from the graph
+    for( uint i=0; i<mach->req(); i++ ) {
+      mach->set_req(i,NULL);
+    }
+  }
+
+  // PhaseChaitin::fixup_spills will sometimes generate spill code
+  // via the matcher.  By the time, nodes have been wired into the CFG,
+  // and any further nodes generated by expand rules will be left hanging
+  // in space, and will not get emitted as output code.  Catch this.
+  // Also, catch any new register allocation constraints ("projections")
+  // generated belatedly during spill code generation.
+  if (_allocation_started) {
+    guarantee(ex == mach, "no expand rules during spill generation");
+    guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
+  }
+
+  if (leaf->is_Con()) {
+    // Record the con for sharing
+    _shared_constants.map(leaf->_idx, ex);
+  }
+
+  return ex;
+}
+
+void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
+  // 'op' is what I am expecting to receive
+  int op = _leftOp[rule];
+  // Operand type to catch childs result
+  // This is what my child will give me.
+  int opnd_class_instance = s->_rule[op];
+  // Choose between operand class or not.
+  // This is what I will recieve.
+  int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
+  // New rule for child.  Chase operand classes to get the actual rule.
+  int newrule = s->_rule[catch_op];
+
+  if( newrule < NUM_OPERANDS ) {
+    // Chain from operand or operand class, may be output of shared node
+    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
+            "Bad AD file: Instruction chain rule must chain from operand");
+    // Insert operand into array of operands for this instruction
+    mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );
+
+    ReduceOper( s, newrule, mem, mach );
+  } else {
+    // Chain from the result of an instruction
+    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
+    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
+    Node *mem1 = (Node*)1;
+    mach->add_req( ReduceInst(s, newrule, mem1) );
+  }
+  return;
+}
+
+
+uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
+  if( s->_leaf->is_Load() ) {
+    Node *mem2 = s->_leaf->in(MemNode::Memory);
+    assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
+    mem = mem2;
+  }
+  if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
+    if( mach->in(0) == NULL )
+      mach->set_req(0, s->_leaf->in(0));
+  }
+
+  // Now recursively walk the state tree & add operand list.
+  for( uint i=0; i<2; i++ ) {   // binary tree
+    State *newstate = s->_kids[i];
+    if( newstate == NULL ) break;      // Might only have 1 child
+    // 'op' is what I am expecting to receive
+    int op;
+    if( i == 0 ) {
+      op = _leftOp[rule];
+    } else {
+      op = _rightOp[rule];
+    }
+    // Operand type to catch childs result
+    // This is what my child will give me.
+    int opnd_class_instance = newstate->_rule[op];
+    // Choose between operand class or not.
+    // This is what I will receive.
+    int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
+    // New rule for child.  Chase operand classes to get the actual rule.
+    int newrule = newstate->_rule[catch_op];
+
+    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
+      // Operand/operandClass
+      // Insert operand into array of operands for this instruction
+      mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
+      ReduceOper( newstate, newrule, mem, mach );
+
+    } else {                    // Child is internal operand or new instruction
+      if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
+        // internal operand --> call ReduceInst_Interior
+        // Interior of complex instruction.  Do nothing but recurse.
+        num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
+      } else {
+        // instruction --> call build operand(  ) to catch result
+        //             --> ReduceInst( newrule )
+        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
+        Node *mem1 = (Node*)1;
+        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
+      }
+    }
+    assert( mach->_opnds[num_opnds-1], "" );
+  }
+  return num_opnds;
+}
+
+// This routine walks the interior of possible complex operands.
+// At each point we check our children in the match tree:
+// (1) No children -
+//     We are a leaf; add _leaf field as an input to the MachNode
+// (2) Child is an internal operand -
+//     Skip over it ( do nothing )
+// (3) Child is an instruction -
+//     Call ReduceInst recursively and
+//     and instruction as an input to the MachNode
+void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
+  assert( rule < _LAST_MACH_OPER, "called with operand rule" );
+  State *kid = s->_kids[0];
+  assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" );
+
+  // Leaf?  And not subsumed?
+  if( kid == NULL && !_swallowed[rule] ) {
+    mach->add_req( s->_leaf );  // Add leaf pointer
+    return;                     // Bail out
+  }
+
+  if( s->_leaf->is_Load() ) {
+    assert( mem == (Node*)1, "multiple Memories being matched at once?" );
+    mem = s->_leaf->in(MemNode::Memory);
+  }
+  if( s->_leaf->in(0) && s->_leaf->req() > 1) {
+    if( !mach->in(0) )
+      mach->set_req(0,s->_leaf->in(0));
+    else {
+      assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
+    }
+  }
+
+  for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) {   // binary tree
+    int newrule;
+    if( i == 0 )
+      newrule = kid->_rule[_leftOp[rule]];
+    else
+      newrule = kid->_rule[_rightOp[rule]];
+
+    if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
+      // Internal operand; recurse but do nothing else
+      ReduceOper( kid, newrule, mem, mach );
+
+    } else {                    // Child is a new instruction
+      // Reduce the instruction, and add a direct pointer from this
+      // machine instruction to the newly reduced one.
+      Node *mem1 = (Node*)1;
+      mach->add_req( ReduceInst( kid, newrule, mem1 ) );
+    }
+  }
+}
+
+
+// -------------------------------------------------------------------------
+// Java-Java calling convention
+// (what you use when Java calls Java)
+
+//------------------------------find_receiver----------------------------------
+// For a given signature, return the OptoReg for parameter 0.
+OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
+  VMRegPair regs;
+  BasicType sig_bt = T_OBJECT;
+  calling_convention(&sig_bt, &regs, 1, is_outgoing);
+  // Return argument 0 register.  In the LP64 build pointers
+  // take 2 registers, but the VM wants only the 'main' name.
+  return OptoReg::as_OptoReg(regs.first());
+}
+
+// A method-klass-holder may be passed in the inline_cache_reg
+// and then expanded into the inline_cache_reg and a method_oop register
+//   defined in ad_<arch>.cpp
+
+
+//------------------------------find_shared------------------------------------
+// Set bits if Node is shared or otherwise a root
+void Matcher::find_shared( Node *n ) {
+  // Allocate stack of size C->unique() * 2 to avoid frequent realloc
+  MStack mstack(C->unique() * 2);
+  mstack.push(n, Visit);     // Don't need to pre-visit root node
+  while (mstack.is_nonempty()) {
+    n = mstack.node();       // Leave node on stack
+    Node_State nstate = mstack.state();
+    if (nstate == Pre_Visit) {
+      if (is_visited(n)) {   // Visited already?
+        // Node is shared and has no reason to clone.  Flag it as shared.
+        // This causes it to match into a register for the sharing.
+        set_shared(n);       // Flag as shared and
+        mstack.pop();        // remove node from stack
+        continue;
+      }
+      nstate = Visit; // Not already visited; so visit now
+    }
+    if (nstate == Visit) {
+      mstack.set_state(Post_Visit);
+      set_visited(n);   // Flag as visited now
+      bool mem_op = false;
+
+      switch( n->Opcode() ) {  // Handle some opcodes special
+      case Op_Phi:             // Treat Phis as shared roots
+      case Op_Parm:
+      case Op_Proj:            // All handled specially during matching
+        set_shared(n);
+        set_dontcare(n);
+        break;
+      case Op_If:
+      case Op_CountedLoopEnd:
+        mstack.set_state(Alt_Post_Visit); // Alternative way
+        // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)).  Helps
+        // with matching cmp/branch in 1 instruction.  The Matcher needs the
+        // Bool and CmpX side-by-side, because it can only get at constants
+        // that are at the leaves of Match trees, and the Bool's condition acts
+        // as a constant here.
+        mstack.push(n->in(1), Visit);         // Clone the Bool
+        mstack.push(n->in(0), Pre_Visit);     // Visit control input
+        continue; // while (mstack.is_nonempty())
+      case Op_ConvI2D:         // These forms efficiently match with a prior
+      case Op_ConvI2F:         //   Load but not a following Store
+        if( n->in(1)->is_Load() &&        // Prior load
+            n->outcnt() == 1 &&           // Not already shared
+            n->unique_out()->is_Store() ) // Following store
+          set_shared(n);       // Force it to be a root
+        break;
+      case Op_ReverseBytesI:
+      case Op_ReverseBytesL:
+        if( n->in(1)->is_Load() &&        // Prior load
+            n->outcnt() == 1 )            // Not already shared
+          set_shared(n);                  // Force it to be a root
+        break;
+      case Op_BoxLock:         // Cant match until we get stack-regs in ADLC
+      case Op_IfFalse:
+      case Op_IfTrue:
+      case Op_MachProj:
+      case Op_MergeMem:
+      case Op_Catch:
+      case Op_CatchProj:
+      case Op_CProj:
+      case Op_JumpProj:
+      case Op_JProj:
+      case Op_NeverBranch:
+        set_dontcare(n);
+        break;
+      case Op_Jump:
+        mstack.push(n->in(1), Visit);         // Switch Value
+        mstack.push(n->in(0), Pre_Visit);     // Visit Control input
+        continue;                             // while (mstack.is_nonempty())
+      case Op_StrComp:
+        set_shared(n); // Force result into register (it will be anyways)
+        break;
+      case Op_ConP: {  // Convert pointers above the centerline to NUL
+        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
+        const TypePtr* tp = tn->type()->is_ptr();
+        if (tp->_ptr == TypePtr::AnyNull) {
+          tn->set_type(TypePtr::NULL_PTR);
+        }
+        break;
+      }
+      case Op_Binary:         // These are introduced in the Post_Visit state.
+        ShouldNotReachHere();
+        break;
+      case Op_StoreB:         // Do match these, despite no ideal reg
+      case Op_StoreC:
+      case Op_StoreCM:
+      case Op_StoreD:
+      case Op_StoreF:
+      case Op_StoreI:
+      case Op_StoreL:
+      case Op_StoreP:
+      case Op_Store16B:
+      case Op_Store8B:
+      case Op_Store4B:
+      case Op_Store8C:
+      case Op_Store4C:
+      case Op_Store2C:
+      case Op_Store4I:
+      case Op_Store2I:
+      case Op_Store2L:
+      case Op_Store4F:
+      case Op_Store2F:
+      case Op_Store2D:
+      case Op_ClearArray:
+      case Op_SafePoint:
+        mem_op = true;
+        break;
+      case Op_LoadB:
+      case Op_LoadC:
+      case Op_LoadD:
+      case Op_LoadF:
+      case Op_LoadI:
+      case Op_LoadKlass:
+      case Op_LoadL:
+      case Op_LoadS:
+      case Op_LoadP:
+      case Op_LoadRange:
+      case Op_LoadD_unaligned:
+      case Op_LoadL_unaligned:
+      case Op_Load16B:
+      case Op_Load8B:
+      case Op_Load4B:
+      case Op_Load4C:
+      case Op_Load2C:
+      case Op_Load8C:
+      case Op_Load8S:
+      case Op_Load4S:
+      case Op_Load2S:
+      case Op_Load4I:
+      case Op_Load2I:
+      case Op_Load2L:
+      case Op_Load4F:
+      case Op_Load2F:
+      case Op_Load2D:
+        mem_op = true;
+        // Must be root of match tree due to prior load conflict
+        if( C->subsume_loads() == false ) {
+          set_shared(n);
+        }
+        // Fall into default case
+      default:
+        if( !n->ideal_reg() )
+          set_dontcare(n);  // Unmatchable Nodes
+      } // end_switch
+
+      for(int i = n->req() - 1; i >= 0; --i) { // For my children
+        Node *m = n->in(i); // Get ith input
+        if (m == NULL) continue;  // Ignore NULLs
+        uint mop = m->Opcode();
+
+        // Must clone all producers of flags, or we will not match correctly.
+        // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
+        // then it will match into an ideal Op_RegFlags.  Alas, the fp-flags
+        // are also there, so we may match a float-branch to int-flags and
+        // expect the allocator to haul the flags from the int-side to the
+        // fp-side.  No can do.
+        if( _must_clone[mop] ) {
+          mstack.push(m, Visit);
+          continue; // for(int i = ...)
+        }
+
+        // Clone addressing expressions as they are "free" in most instructions
+        if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
+          Node *off = m->in(AddPNode::Offset);
+          if( off->is_Con() ) {
+            set_visited(m);  // Flag as visited now
+            Node *adr = m->in(AddPNode::Address);
+
+            // Intel, ARM and friends can handle 2 adds in addressing mode
+            if( clone_shift_expressions && adr->Opcode() == Op_AddP &&
+                // AtomicAdd is not an addressing expression.
+                // Cheap to find it by looking for screwy base.
+                !adr->in(AddPNode::Base)->is_top() ) {
+              set_visited(adr);  // Flag as visited now
+              Node *shift = adr->in(AddPNode::Offset);
+              // Check for shift by small constant as well
+              if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
+                  shift->in(2)->get_int() <= 3 ) {
+                set_visited(shift);  // Flag as visited now
+                mstack.push(shift->in(2), Visit);
+#ifdef _LP64
+                // Allow Matcher to match the rule which bypass
+                // ConvI2L operation for an array index on LP64
+                // if the index value is positive.
+                if( shift->in(1)->Opcode() == Op_ConvI2L &&
+                    shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
+                  set_visited(shift->in(1));  // Flag as visited now
+                  mstack.push(shift->in(1)->in(1), Pre_Visit);
+                } else
+#endif
+                mstack.push(shift->in(1), Pre_Visit);
+              } else {
+                mstack.push(shift, Pre_Visit);
+              }
+              mstack.push(adr->in(AddPNode::Address), Pre_Visit);
+              mstack.push(adr->in(AddPNode::Base), Pre_Visit);
+            } else {  // Sparc, Alpha, PPC and friends
+              mstack.push(adr, Pre_Visit);
+            }
+
+            // Clone X+offset as it also folds into most addressing expressions
+            mstack.push(off, Visit);
+            mstack.push(m->in(AddPNode::Base), Pre_Visit);
+            continue; // for(int i = ...)
+          } // if( off->is_Con() )
+        }   // if( mem_op &&
+        mstack.push(m, Pre_Visit);
+      }     // for(int i = ...)
+    }
+    else if (nstate == Alt_Post_Visit) {
+      mstack.pop(); // Remove node from stack
+      // We cannot remove the Cmp input from the Bool here, as the Bool may be
+      // shared and all users of the Bool need to move the Cmp in parallel.
+      // This leaves both the Bool and the If pointing at the Cmp.  To
+      // prevent the Matcher from trying to Match the Cmp along both paths
+      // BoolNode::match_edge always returns a zero.
+
+      // We reorder the Op_If in a pre-order manner, so we can visit without
+      // accidently sharing the Cmp (the Bool and the If make 2 users).
+      n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
+    }
+    else if (nstate == Post_Visit) {
+      mstack.pop(); // Remove node from stack
+
+      // Now hack a few special opcodes
+      switch( n->Opcode() ) {       // Handle some opcodes special
+      case Op_StorePConditional:
+      case Op_StoreLConditional:
+      case Op_CompareAndSwapI:
+      case Op_CompareAndSwapL:
+      case Op_CompareAndSwapP: {   // Convert trinary to binary-tree
+        Node *newval = n->in(MemNode::ValueIn );
+        Node *oldval  = n->in(LoadStoreNode::ExpectedIn);
+        Node *pair = new (C, 3) BinaryNode( oldval, newval );
+        n->set_req(MemNode::ValueIn,pair);
+        n->del_req(LoadStoreNode::ExpectedIn);
+        break;
+      }
+      case Op_CMoveD:              // Convert trinary to binary-tree
+      case Op_CMoveF:
+      case Op_CMoveI:
+      case Op_CMoveL:
+      case Op_CMoveP: {
+        // Restructure into a binary tree for Matching.  It's possible that
+        // we could move this code up next to the graph reshaping for IfNodes
+        // or vice-versa, but I do not want to debug this for Ladybird.
+        // 10/2/2000 CNC.
+        Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1));
+        n->set_req(1,pair1);
+        Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3));
+        n->set_req(2,pair2);
+        n->del_req(3);
+        break;
+      }
+      default:
+        break;
+      }
+    }
+    else {
+      ShouldNotReachHere();
+    }
+  } // end of while (mstack.is_nonempty())
+}
+
+#ifdef ASSERT
+// machine-independent root to machine-dependent root
+void Matcher::dump_old2new_map() {
+  _old2new_map.dump();
+}
+#endif
+
+//---------------------------collect_null_checks-------------------------------
+// Find null checks in the ideal graph; write a machine-specific node for
+// it.  Used by later implicit-null-check handling.  Actually collects
+// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
+// value being tested.
+void Matcher::collect_null_checks( Node *proj ) {
+  Node *iff = proj->in(0);
+  if( iff->Opcode() == Op_If ) {
+    // During matching If's have Bool & Cmp side-by-side
+    BoolNode *b = iff->in(1)->as_Bool();
+    Node *cmp = iff->in(2);
+    if( cmp->Opcode() == Op_CmpP ) {
+      if( cmp->in(2)->bottom_type() == TypePtr::NULL_PTR ) {
+
+        if( proj->Opcode() == Op_IfTrue ) {
+          extern int all_null_checks_found;
+          all_null_checks_found++;
+          if( b->_test._test == BoolTest::ne ) {
+            _null_check_tests.push(proj);
+            _null_check_tests.push(cmp->in(1));
+          }
+        } else {
+          assert( proj->Opcode() == Op_IfFalse, "" );
+          if( b->_test._test == BoolTest::eq ) {
+            _null_check_tests.push(proj);
+            _null_check_tests.push(cmp->in(1));
+          }
+        }
+      }
+    }
+  }
+}
+
+//---------------------------validate_null_checks------------------------------
+// Its possible that the value being NULL checked is not the root of a match
+// tree.  If so, I cannot use the value in an implicit null check.
+void Matcher::validate_null_checks( ) {
+  uint cnt = _null_check_tests.size();
+  for( uint i=0; i < cnt; i+=2 ) {
+    Node *test = _null_check_tests[i];
+    Node *val = _null_check_tests[i+1];
+    if (has_new_node(val)) {
+      // Is a match-tree root, so replace with the matched value
+      _null_check_tests.map(i+1, new_node(val));
+    } else {
+      // Yank from candidate list
+      _null_check_tests.map(i+1,_null_check_tests[--cnt]);
+      _null_check_tests.map(i,_null_check_tests[--cnt]);
+      _null_check_tests.pop();
+      _null_check_tests.pop();
+      i-=2;
+    }
+  }
+}
+
+
+// Used by the DFA in dfa_sparc.cpp.  Check for a prior FastLock
+// acting as an Acquire and thus we don't need an Acquire here.  We
+// retain the Node to act as a compiler ordering barrier.
+bool Matcher::prior_fast_lock( const Node *acq ) {
+  Node *r = acq->in(0);
+  if( !r->is_Region() || r->req() <= 1 ) return false;
+  Node *proj = r->in(1);
+  if( !proj->is_Proj() ) return false;
+  Node *call = proj->in(0);
+  if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() )
+    return false;
+
+  return true;
+}
+
+// Used by the DFA in dfa_sparc.cpp.  Check for a following FastUnLock
+// acting as a Release and thus we don't need a Release here.  We
+// retain the Node to act as a compiler ordering barrier.
+bool Matcher::post_fast_unlock( const Node *rel ) {
+  Compile *C = Compile::current();
+  assert( rel->Opcode() == Op_MemBarRelease, "" );
+  const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel;
+  DUIterator_Fast imax, i = mem->fast_outs(imax);
+  Node *ctrl = NULL;
+  while( true ) {
+    ctrl = mem->fast_out(i);            // Throw out-of-bounds if proj not found
+    assert( ctrl->is_Proj(), "only projections here" );
+    ProjNode *proj = (ProjNode*)ctrl;
+    if( proj->_con == TypeFunc::Control &&
+        !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
+      break;
+    i++;
+  }
+  Node *iff = NULL;
+  for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
+    Node *x = ctrl->fast_out(j);
+    if( x->is_If() && x->req() > 1 &&
+        !C->node_arena()->contains(x) ) { // Unmatched old-space only
+      iff = x;
+      break;
+    }
+  }
+  if( !iff ) return false;
+  Node *bol = iff->in(1);
+  // The iff might be some random subclass of If or bol might be Con-Top
+  if (!bol->is_Bool())  return false;
+  assert( bol->req() > 1, "" );
+  return (bol->in(1)->Opcode() == Op_FastUnlock);
+}
+
+// Used by the DFA in dfa_xxx.cpp.  Check for a following barrier or
+// atomic instruction acting as a store_load barrier without any
+// intervening volatile load, and thus we don't need a barrier here.
+// We retain the Node to act as a compiler ordering barrier.
+bool Matcher::post_store_load_barrier(const Node *vmb) {
+  Compile *C = Compile::current();
+  assert( vmb->is_MemBar(), "" );
+  assert( vmb->Opcode() != Op_MemBarAcquire, "" );
+  const MemBarNode *mem = (const MemBarNode*)vmb;
+
+  // Get the Proj node, ctrl, that can be used to iterate forward
+  Node *ctrl = NULL;
+  DUIterator_Fast imax, i = mem->fast_outs(imax);
+  while( true ) {
+    ctrl = mem->fast_out(i);            // Throw out-of-bounds if proj not found
+    assert( ctrl->is_Proj(), "only projections here" );
+    ProjNode *proj = (ProjNode*)ctrl;
+    if( proj->_con == TypeFunc::Control &&
+        !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
+      break;
+    i++;
+  }
+
+  for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
+    Node *x = ctrl->fast_out(j);
+    int xop = x->Opcode();
+
+    // We don't need current barrier if we see another or a lock
+    // before seeing volatile load.
+    //
+    // Op_Fastunlock previously appeared in the Op_* list below.
+    // With the advent of 1-0 lock operations we're no longer guaranteed
+    // that a monitor exit operation contains a serializing instruction.
+
+    if (xop == Op_MemBarVolatile ||
+        xop == Op_FastLock ||
+        xop == Op_CompareAndSwapL ||
+        xop == Op_CompareAndSwapP ||
+        xop == Op_CompareAndSwapI)
+      return true;
+
+    if (x->is_MemBar()) {
+      // We must retain this membar if there is an upcoming volatile
+      // load, which will be preceded by acquire membar.
+      if (xop == Op_MemBarAcquire)
+        return false;
+      // For other kinds of barriers, check by pretending we
+      // are them, and seeing if we can be removed.
+      else
+        return post_store_load_barrier((const MemBarNode*)x);
+    }
+
+    // Delicate code to detect case of an upcoming fastlock block
+    if( x->is_If() && x->req() > 1 &&
+        !C->node_arena()->contains(x) ) { // Unmatched old-space only
+      Node *iff = x;
+      Node *bol = iff->in(1);
+      // The iff might be some random subclass of If or bol might be Con-Top
+      if (!bol->is_Bool())  return false;
+      assert( bol->req() > 1, "" );
+      return (bol->in(1)->Opcode() == Op_FastUnlock);
+    }
+    // probably not necessary to check for these
+    if (x->is_Call() || x->is_SafePoint() || x->is_block_proj())
+      return false;
+  }
+  return false;
+}
+
+//=============================================================================
+//---------------------------State---------------------------------------------
+State::State(void) {
+#ifdef ASSERT
+  _id = 0;
+  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
+  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
+  //memset(_cost, -1, sizeof(_cost));
+  //memset(_rule, -1, sizeof(_rule));
+#endif
+  memset(_valid, 0, sizeof(_valid));
+}
+
+#ifdef ASSERT
+State::~State() {
+  _id = 99;
+  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
+  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
+  memset(_cost, -3, sizeof(_cost));
+  memset(_rule, -3, sizeof(_rule));
+}
+#endif
+
+#ifndef PRODUCT
+//---------------------------dump----------------------------------------------
+void State::dump() {
+  tty->print("\n");
+  dump(0);
+}
+
+void State::dump(int depth) {
+  for( int j = 0; j < depth; j++ )
+    tty->print("   ");
+  tty->print("--N: ");
+  _leaf->dump();
+  uint i;
+  for( i = 0; i < _LAST_MACH_OPER; i++ )
+    // Check for valid entry
+    if( valid(i) ) {
+      for( int j = 0; j < depth; j++ )
+        tty->print("   ");
+        assert(_cost[i] != max_juint, "cost must be a valid value");
+        assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
+        tty->print_cr("%s  %d  %s",
+                      ruleName[i], _cost[i], ruleName[_rule[i]] );
+      }
+  tty->print_cr("");
+
+  for( i=0; i<2; i++ )
+    if( _kids[i] )
+      _kids[i]->dump(depth+1);
+}
+#endif
diff --git a/src/share/vm/opto/matcher.hpp b/src/share/vm/opto/matcher.hpp
new file mode 100644
index 000000000..a33c4e92d
--- /dev/null
+++ b/src/share/vm/opto/matcher.hpp
@@ -0,0 +1,392 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+class Node;
+class MachNode;
+class MachTypeNode;
+class MachOper;
+
+//---------------------------Matcher-------------------------------------------
+class Matcher : public PhaseTransform {
+  friend class VMStructs;
+  // Private arena of State objects
+  ResourceArea _states_arena;
+
+  VectorSet   _visited;         // Visit bits
+
+  // Used to control the Label pass
+  VectorSet   _shared;          // Shared Ideal Node
+  VectorSet   _dontcare;        // Nothing the matcher cares about
+
+  // Private methods which perform the actual matching and reduction
+  // Walks the label tree, generating machine nodes
+  MachNode *ReduceInst( State *s, int rule, Node *&mem);
+  void ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach);
+  uint ReduceInst_Interior(State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds);
+  void ReduceOper( State *s, int newrule, Node *&mem, MachNode *mach );
+
+  // If this node already matched using "rule", return the MachNode for it.
+  MachNode* find_shared_constant(Node* con, uint rule);
+
+  // Convert a dense opcode number to an expanded rule number
+  const int *_reduceOp;
+  const int *_leftOp;
+  const int *_rightOp;
+
+  // Map dense opcode number to info on when rule is swallowed constant.
+  const bool *_swallowed;
+
+  // Map dense rule number to determine if this is an instruction chain rule
+  const uint _begin_inst_chain_rule;
+  const uint _end_inst_chain_rule;
+
+  // We want to clone constants and possible CmpI-variants.
+  // If we do not clone CmpI, then we can have many instances of
+  // condition codes alive at once.  This is OK on some chips and
+  // bad on others.  Hence the machine-dependent table lookup.
+  const char *_must_clone;
+
+  // Find shared Nodes, or Nodes that otherwise are Matcher roots
+  void find_shared( Node *n );
+
+  // Debug and profile information for nodes in old space:
+  GrowableArray<Node_Notes*>* _old_node_note_array;
+
+  // Node labeling iterator for instruction selection
+  Node *Label_Root( const Node *n, State *svec, Node *control, const Node *mem );
+
+  Node *transform( Node *dummy );
+
+  Node_List &_proj_list;        // For Machine nodes killing many values
+
+  Node_Array _shared_constants;
+
+  debug_only(Node_Array _old2new_map;)   // Map roots of ideal-trees to machine-roots
+
+  // Accessors for the inherited field PhaseTransform::_nodes:
+  void   grow_new_node_array(uint idx_limit) {
+    _nodes.map(idx_limit-1, NULL);
+  }
+  bool    has_new_node(const Node* n) const {
+    return _nodes.at(n->_idx) != NULL;
+  }
+  Node*       new_node(const Node* n) const {
+    assert(has_new_node(n), "set before get");
+    return _nodes.at(n->_idx);
+  }
+  void    set_new_node(const Node* n, Node *nn) {
+    assert(!has_new_node(n), "set only once");
+    _nodes.map(n->_idx, nn);
+  }
+
+#ifdef ASSERT
+  // Make sure only new nodes are reachable from this node
+  void verify_new_nodes_only(Node* root);
+#endif
+
+public:
+  int LabelRootDepth;
+  static const int base2reg[];        // Map Types to machine register types
+  // Convert ideal machine register to a register mask for spill-loads
+  static const RegMask *idealreg2regmask[];
+  RegMask *idealreg2spillmask[_last_machine_leaf];
+  RegMask *idealreg2debugmask[_last_machine_leaf];
+  void init_spill_mask( Node *ret );
+  // Convert machine register number to register mask
+  static uint mreg2regmask_max;
+  static RegMask mreg2regmask[];
+  static RegMask STACK_ONLY_mask;
+
+  bool    is_shared( Node *n ) { return _shared.test(n->_idx) != 0; }
+  void   set_shared( Node *n ) {  _shared.set(n->_idx); }
+  bool   is_visited( Node *n ) { return _visited.test(n->_idx) != 0; }
+  void  set_visited( Node *n ) { _visited.set(n->_idx); }
+  bool  is_dontcare( Node *n ) { return _dontcare.test(n->_idx) != 0; }
+  void set_dontcare( Node *n ) {  _dontcare.set(n->_idx); }
+
+  // Mode bit to tell DFA and expand rules whether we are running after
+  // (or during) register selection.  Usually, the matcher runs before,
+  // but it will also get called to generate post-allocation spill code.
+  // In this situation, it is a deadly error to attempt to allocate more
+  // temporary registers.
+  bool _allocation_started;
+
+  // Machine register names
+  static const char *regName[];
+  // Machine register encodings
+  static const unsigned char _regEncode[];
+  // Machine Node names
+  const char **_ruleName;
+  // Rules that are cheaper to rematerialize than to spill
+  static const uint _begin_rematerialize;
+  static const uint _end_rematerialize;
+
+  // An array of chars, from 0 to _last_Mach_Reg.
+  // No Save       = 'N' (for register windows)
+  // Save on Entry = 'E'
+  // Save on Call  = 'C'
+  // Always Save   = 'A' (same as SOE + SOC)
+  const char *_register_save_policy;
+  const char *_c_reg_save_policy;
+  // Convert a machine register to a machine register type, so-as to
+  // properly match spill code.
+  const int *_register_save_type;
+  // Maps from machine register to boolean; true if machine register can
+  // be holding a call argument in some signature.
+  static bool can_be_java_arg( int reg );
+  // Maps from machine register to boolean; true if machine register holds
+  // a spillable argument.
+  static bool is_spillable_arg( int reg );
+
+  // List of IfFalse or IfTrue Nodes that indicate a taken null test.
+  // List is valid in the post-matching space.
+  Node_List _null_check_tests;
+  void collect_null_checks( Node *proj );
+  void validate_null_checks( );
+
+  Matcher( Node_List &proj_list );
+
+  // Select instructions for entire method
+  void  match( );
+  // Helper for match
+  OptoReg::Name warp_incoming_stk_arg( VMReg reg );
+
+  // Transform, then walk.  Does implicit DCE while walking.
+  // Name changed from "transform" to avoid it being virtual.
+  Node *xform( Node *old_space_node, int Nodes );
+
+  // Match a single Ideal Node - turn it into a 1-Node tree; Label & Reduce.
+  MachNode *match_tree( const Node *n );
+  MachNode *match_sfpt( SafePointNode *sfpt );
+  // Helper for match_sfpt
+  OptoReg::Name warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call );
+
+  // Initialize first stack mask and related masks.
+  void init_first_stack_mask();
+
+  // If we should save-on-entry this register
+  bool is_save_on_entry( int reg );
+
+  // Fixup the save-on-entry registers
+  void Fixup_Save_On_Entry( );
+
+  // --- Frame handling ---
+
+  // Register number of the stack slot corresponding to the incoming SP.
+  // Per the Big Picture in the AD file, it is:
+  //   SharedInfo::stack0 + locks + in_preserve_stack_slots + pad2.
+  OptoReg::Name _old_SP;
+
+  // Register number of the stack slot corresponding to the highest incoming
+  // argument on the stack.  Per the Big Picture in the AD file, it is:
+  //   _old_SP + out_preserve_stack_slots + incoming argument size.
+  OptoReg::Name _in_arg_limit;
+
+  // Register number of the stack slot corresponding to the new SP.
+  // Per the Big Picture in the AD file, it is:
+  //   _in_arg_limit + pad0
+  OptoReg::Name _new_SP;
+
+  // Register number of the stack slot corresponding to the highest outgoing
+  // argument on the stack.  Per the Big Picture in the AD file, it is:
+  //   _new_SP + max outgoing arguments of all calls
+  OptoReg::Name _out_arg_limit;
+
+  OptoRegPair *_parm_regs;        // Array of machine registers per argument
+  RegMask *_calling_convention_mask; // Array of RegMasks per argument
+
+  // Does matcher support this ideal node?
+  static const bool has_match_rule(int opcode);
+  static const bool _hasMatchRule[_last_opcode];
+
+  // Used to determine if we have fast l2f conversion
+  // USII has it, USIII doesn't
+  static const bool convL2FSupported(void);
+
+  // Vector width in bytes
+  static const uint vector_width_in_bytes(void);
+
+  // Vector ideal reg
+  static const uint vector_ideal_reg(void);
+
+  // Used to determine a "low complexity" 64-bit constant.  (Zero is simple.)
+  // The standard of comparison is one (StoreL ConL) vs. two (StoreI ConI).
+  // Depends on the details of 64-bit constant generation on the CPU.
+  static const bool isSimpleConstant64(jlong con);
+
+  // These calls are all generated by the ADLC
+
+  // TRUE - grows up, FALSE - grows down (Intel)
+  virtual bool stack_direction() const;
+
+  // Java-Java calling convention
+  // (what you use when Java calls Java)
+
+  // Alignment of stack in bytes, standard Intel word alignment is 4.
+  // Sparc probably wants at least double-word (8).
+  static uint stack_alignment_in_bytes();
+  // Alignment of stack, measured in stack slots.
+  // The size of stack slots is defined by VMRegImpl::stack_slot_size.
+  static uint stack_alignment_in_slots() {
+    return stack_alignment_in_bytes() / (VMRegImpl::stack_slot_size);
+  }
+
+  // Array mapping arguments to registers.  Argument 0 is usually the 'this'
+  // pointer.  Registers can include stack-slots and regular registers.
+  static void calling_convention( BasicType *, VMRegPair *, uint len, bool is_outgoing );
+
+  // Convert a sig into a calling convention register layout
+  // and find interesting things about it.
+  static OptoReg::Name  find_receiver( bool is_outgoing );
+  // Return address register.  On Intel it is a stack-slot.  On PowerPC
+  // it is the Link register.  On Sparc it is r31?
+  virtual OptoReg::Name return_addr() const;
+  RegMask              _return_addr_mask;
+  // Return value register.  On Intel it is EAX.  On Sparc i0/o0.
+  static OptoRegPair   return_value(int ideal_reg, bool is_outgoing);
+  static OptoRegPair c_return_value(int ideal_reg, bool is_outgoing);
+  RegMask                     _return_value_mask;
+  // Inline Cache Register
+  static OptoReg::Name  inline_cache_reg();
+  static const RegMask &inline_cache_reg_mask();
+  static int            inline_cache_reg_encode();
+
+  // Register for DIVI projection of divmodI
+  static RegMask divI_proj_mask();
+  // Register for MODI projection of divmodI
+  static RegMask modI_proj_mask();
+
+  // Register for DIVL projection of divmodL
+  static RegMask divL_proj_mask();
+  // Register for MODL projection of divmodL
+  static RegMask modL_proj_mask();
+
+  // Java-Interpreter calling convention
+  // (what you use when calling between compiled-Java and Interpreted-Java
+
+  // Number of callee-save + always-save registers
+  // Ignores frame pointer and "special" registers
+  static int  number_of_saved_registers();
+
+  // The Method-klass-holder may be passed in the inline_cache_reg
+  // and then expanded into the inline_cache_reg and a method_oop register
+
+  static OptoReg::Name  interpreter_method_oop_reg();
+  static const RegMask &interpreter_method_oop_reg_mask();
+  static int            interpreter_method_oop_reg_encode();
+
+  static OptoReg::Name  compiler_method_oop_reg();
+  static const RegMask &compiler_method_oop_reg_mask();
+  static int            compiler_method_oop_reg_encode();
+
+  // Interpreter's Frame Pointer Register
+  static OptoReg::Name  interpreter_frame_pointer_reg();
+  static const RegMask &interpreter_frame_pointer_reg_mask();
+
+  // Java-Native calling convention
+  // (what you use when intercalling between Java and C++ code)
+
+  // Array mapping arguments to registers.  Argument 0 is usually the 'this'
+  // pointer.  Registers can include stack-slots and regular registers.
+  static void c_calling_convention( BasicType*, VMRegPair *, uint );
+  // Frame pointer. The frame pointer is kept at the base of the stack
+  // and so is probably the stack pointer for most machines.  On Intel
+  // it is ESP.  On the PowerPC it is R1.  On Sparc it is SP.
+  OptoReg::Name  c_frame_pointer() const;
+  static RegMask c_frame_ptr_mask;
+
+  // !!!!! Special stuff for building ScopeDescs
+  virtual int      regnum_to_fpu_offset(int regnum);
+
+  // Is this branch offset small enough to be addressed by a short branch?
+  bool is_short_branch_offset(int offset);
+
+  // Optional scaling for the parameter to the ClearArray/CopyArray node.
+  static const bool init_array_count_is_in_bytes;
+
+  // Threshold small size (in bytes) for a ClearArray/CopyArray node.
+  // Anything this size or smaller may get converted to discrete scalar stores.
+  static const int init_array_short_size;
+
+  // Should the Matcher clone shifts on addressing modes, expecting them to
+  // be subsumed into complex addressing expressions or compute them into
+  // registers?  True for Intel but false for most RISCs
+  static const bool clone_shift_expressions;
+
+  // Is it better to copy float constants, or load them directly from memory?
+  // Intel can load a float constant from a direct address, requiring no
+  // extra registers.  Most RISCs will have to materialize an address into a
+  // register first, so they may as well materialize the constant immediately.
+  static const bool rematerialize_float_constants;
+
+  // If CPU can load and store mis-aligned doubles directly then no fixup is
+  // needed.  Else we split the double into 2 integer pieces and move it
+  // piece-by-piece.  Only happens when passing doubles into C code or when
+  // calling i2c adapters as the Java calling convention forces doubles to be
+  // aligned.
+  static const bool misaligned_doubles_ok;
+
+  // Perform a platform dependent implicit null fixup.  This is needed
+  // on windows95 to take care of some unusual register constraints.
+  void pd_implicit_null_fixup(MachNode *load, uint idx);
+
+  // Advertise here if the CPU requires explicit rounding operations
+  // to implement the UseStrictFP mode.
+  static const bool strict_fp_requires_explicit_rounding;
+
+  // Do floats take an entire double register or just half?
+  static const bool float_in_double;
+  // Do ints take an entire long register or just half?
+  static const bool int_in_long;
+
+  // This routine is run whenever a graph fails to match.
+  // If it returns, the compiler should bailout to interpreter without error.
+  // In non-product mode, SoftMatchFailure is false to detect non-canonical
+  // graphs.  Print a message and exit.
+  static void soft_match_failure() {
+    if( SoftMatchFailure ) return;
+    else { fatal("SoftMatchFailure is not allowed except in product"); }
+  }
+
+  // Used by the DFA in dfa_sparc.cpp.  Check for a prior FastLock
+  // acting as an Acquire and thus we don't need an Acquire here.  We
+  // retain the Node to act as a compiler ordering barrier.
+  static bool prior_fast_lock( const Node *acq );
+
+  // Used by the DFA in dfa_sparc.cpp.  Check for a following
+  // FastUnLock acting as a Release and thus we don't need a Release
+  // here.  We retain the Node to act as a compiler ordering barrier.
+  static bool post_fast_unlock( const Node *rel );
+
+  // Check for a following volatile memory barrier without an
+  // intervening load and thus we don't need a barrier here.  We
+  // retain the Node to act as a compiler ordering barrier.
+  static bool post_store_load_barrier(const Node* mb);
+
+
+#ifdef ASSERT
+  void dump_old2new_map();      // machine-independent to machine-dependent
+#endif
+};
diff --git a/src/share/vm/opto/memnode.cpp b/src/share/vm/opto/memnode.cpp
new file mode 100644
index 000000000..26904be58
--- /dev/null
+++ b/src/share/vm/opto/memnode.cpp
@@ -0,0 +1,3222 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_memnode.cpp.incl"
+
+//=============================================================================
+uint MemNode::size_of() const { return sizeof(*this); }
+
+const TypePtr *MemNode::adr_type() const {
+  Node* adr = in(Address);
+  const TypePtr* cross_check = NULL;
+  DEBUG_ONLY(cross_check = _adr_type);
+  return calculate_adr_type(adr->bottom_type(), cross_check);
+}
+
+#ifndef PRODUCT
+void MemNode::dump_spec(outputStream *st) const {
+  if (in(Address) == NULL)  return; // node is dead
+#ifndef ASSERT
+  // fake the missing field
+  const TypePtr* _adr_type = NULL;
+  if (in(Address) != NULL)
+    _adr_type = in(Address)->bottom_type()->isa_ptr();
+#endif
+  dump_adr_type(this, _adr_type, st);
+
+  Compile* C = Compile::current();
+  if( C->alias_type(_adr_type)->is_volatile() )
+    st->print(" Volatile!");
+}
+
+void MemNode::dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st) {
+  st->print(" @");
+  if (adr_type == NULL) {
+    st->print("NULL");
+  } else {
+    adr_type->dump_on(st);
+    Compile* C = Compile::current();
+    Compile::AliasType* atp = NULL;
+    if (C->have_alias_type(adr_type))  atp = C->alias_type(adr_type);
+    if (atp == NULL)
+      st->print(", idx=?\?;");
+    else if (atp->index() == Compile::AliasIdxBot)
+      st->print(", idx=Bot;");
+    else if (atp->index() == Compile::AliasIdxTop)
+      st->print(", idx=Top;");
+    else if (atp->index() == Compile::AliasIdxRaw)
+      st->print(", idx=Raw;");
+    else {
+      ciField* field = atp->field();
+      if (field) {
+        st->print(", name=");
+        field->print_name_on(st);
+      }
+      st->print(", idx=%d;", atp->index());
+    }
+  }
+}
+
+extern void print_alias_types();
+
+#endif
+
+//--------------------------Ideal_common---------------------------------------
+// Look for degenerate control and memory inputs.  Bypass MergeMem inputs.
+// Unhook non-raw memories from complete (macro-expanded) initializations.
+Node *MemNode::Ideal_common(PhaseGVN *phase, bool can_reshape) {
+  // If our control input is a dead region, kill all below the region
+  Node *ctl = in(MemNode::Control);
+  if (ctl && remove_dead_region(phase, can_reshape))
+    return this;
+
+  // Ignore if memory is dead, or self-loop
+  Node *mem = in(MemNode::Memory);
+  if( phase->type( mem ) == Type::TOP ) return NodeSentinel; // caller will return NULL
+  assert( mem != this, "dead loop in MemNode::Ideal" );
+
+  Node *address = in(MemNode::Address);
+  const Type *t_adr = phase->type( address );
+  if( t_adr == Type::TOP )              return NodeSentinel; // caller will return NULL
+
+  // Avoid independent memory operations
+  Node* old_mem = mem;
+
+  if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+    InitializeNode* init = mem->in(0)->as_Initialize();
+    if (init->is_complete()) {  // i.e., after macro expansion
+      const TypePtr* tp = t_adr->is_ptr();
+      uint alias_idx = phase->C->get_alias_index(tp);
+      // Free this slice from the init.  It was hooked, temporarily,
+      // by GraphKit::set_output_for_allocation.
+      if (alias_idx > Compile::AliasIdxRaw) {
+        mem = init->memory(alias_idx);
+        // ...but not with the raw-pointer slice.
+      }
+    }
+  }
+
+  if (mem->is_MergeMem()) {
+    MergeMemNode* mmem = mem->as_MergeMem();
+    const TypePtr *tp = t_adr->is_ptr();
+    uint alias_idx = phase->C->get_alias_index(tp);
+#ifdef ASSERT
+    {
+      // Check that current type is consistent with the alias index used during graph construction
+      assert(alias_idx >= Compile::AliasIdxRaw, "must not be a bad alias_idx");
+      const TypePtr *adr_t =  adr_type();
+      bool consistent =  adr_t == NULL || adr_t->empty() || phase->C->must_alias(adr_t, alias_idx );
+      // Sometimes dead array references collapse to a[-1], a[-2], or a[-3]
+      if( !consistent && adr_t != NULL && !adr_t->empty() &&
+             tp->isa_aryptr() &&    tp->offset() == Type::OffsetBot &&
+          adr_t->isa_aryptr() && adr_t->offset() != Type::OffsetBot &&
+          ( adr_t->offset() == arrayOopDesc::length_offset_in_bytes() ||
+            adr_t->offset() == oopDesc::klass_offset_in_bytes() ||
+            adr_t->offset() == oopDesc::mark_offset_in_bytes() ) ) {
+        // don't assert if it is dead code.
+        consistent = true;
+      }
+      if( !consistent ) {
+        tty->print("alias_idx==%d, adr_type()==", alias_idx); if( adr_t == NULL ) { tty->print("NULL"); } else { adr_t->dump(); }
+        tty->cr();
+        print_alias_types();
+        assert(consistent, "adr_type must match alias idx");
+      }
+    }
+#endif
+    // TypeInstPtr::NOTNULL+any is an OOP with unknown offset - generally
+    // means an array I have not precisely typed yet.  Do not do any
+    // alias stuff with it any time soon.
+    const TypeInstPtr *tinst = tp->isa_instptr();
+    if( tp->base() != Type::AnyPtr &&
+        !(tinst &&
+          tinst->klass()->is_java_lang_Object() &&
+          tinst->offset() == Type::OffsetBot) ) {
+      // compress paths and change unreachable cycles to TOP
+      // If not, we can update the input infinitely along a MergeMem cycle
+      // Equivalent code in PhiNode::Ideal
+      Node* m  = phase->transform(mmem);
+      // If tranformed to a MergeMem, get the desired slice
+      // Otherwise the returned node represents memory for every slice
+      mem = (m->is_MergeMem())? m->as_MergeMem()->memory_at(alias_idx) : m;
+      // Update input if it is progress over what we have now
+    }
+  }
+
+  if (mem != old_mem) {
+    set_req(MemNode::Memory, mem);
+    return this;
+  }
+
+  // let the subclass continue analyzing...
+  return NULL;
+}
+
+// Helper function for proving some simple control dominations.
+// Attempt to prove that control input 'dom' dominates (or equals) 'sub'.
+// Already assumes that 'dom' is available at 'sub', and that 'sub'
+// is not a constant (dominated by the method's StartNode).
+// Used by MemNode::find_previous_store to prove that the
+// control input of a memory operation predates (dominates)
+// an allocation it wants to look past.
+bool MemNode::detect_dominating_control(Node* dom, Node* sub) {
+  if (dom == NULL)      return false;
+  if (dom->is_Proj())   dom = dom->in(0);
+  if (dom->is_Start())  return true; // anything inside the method
+  if (dom->is_Root())   return true; // dom 'controls' a constant
+  int cnt = 20;                      // detect cycle or too much effort
+  while (sub != NULL) {              // walk 'sub' up the chain to 'dom'
+    if (--cnt < 0)   return false;   // in a cycle or too complex
+    if (sub == dom)  return true;
+    if (sub->is_Start())  return false;
+    if (sub->is_Root())   return false;
+    Node* up = sub->in(0);
+    if (sub == up && sub->is_Region()) {
+      for (uint i = 1; i < sub->req(); i++) {
+        Node* in = sub->in(i);
+        if (in != NULL && !in->is_top() && in != sub) {
+          up = in; break;            // take any path on the way up to 'dom'
+        }
+      }
+    }
+    if (sub == up)  return false;    // some kind of tight cycle
+    sub = up;
+  }
+  return false;
+}
+
+//---------------------detect_ptr_independence---------------------------------
+// Used by MemNode::find_previous_store to prove that two base
+// pointers are never equal.
+// The pointers are accompanied by their associated allocations,
+// if any, which have been previously discovered by the caller.
+bool MemNode::detect_ptr_independence(Node* p1, AllocateNode* a1,
+                                      Node* p2, AllocateNode* a2,
+                                      PhaseTransform* phase) {
+  // Attempt to prove that these two pointers cannot be aliased.
+  // They may both manifestly be allocations, and they should differ.
+  // Or, if they are not both allocations, they can be distinct constants.
+  // Otherwise, one is an allocation and the other a pre-existing value.
+  if (a1 == NULL && a2 == NULL) {           // neither an allocation
+    return (p1 != p2) && p1->is_Con() && p2->is_Con();
+  } else if (a1 != NULL && a2 != NULL) {    // both allocations
+    return (a1 != a2);
+  } else if (a1 != NULL) {                  // one allocation a1
+    // (Note:  p2->is_Con implies p2->in(0)->is_Root, which dominates.)
+    return detect_dominating_control(p2->in(0), a1->in(0));
+  } else { //(a2 != NULL)                   // one allocation a2
+    return detect_dominating_control(p1->in(0), a2->in(0));
+  }
+  return false;
+}
+
+
+// The logic for reordering loads and stores uses four steps:
+// (a) Walk carefully past stores and initializations which we
+//     can prove are independent of this load.
+// (b) Observe that the next memory state makes an exact match
+//     with self (load or store), and locate the relevant store.
+// (c) Ensure that, if we were to wire self directly to the store,
+//     the optimizer would fold it up somehow.
+// (d) Do the rewiring, and return, depending on some other part of
+//     the optimizer to fold up the load.
+// This routine handles steps (a) and (b).  Steps (c) and (d) are
+// specific to loads and stores, so they are handled by the callers.
+// (Currently, only LoadNode::Ideal has steps (c), (d).  More later.)
+//
+Node* MemNode::find_previous_store(PhaseTransform* phase) {
+  Node*         ctrl   = in(MemNode::Control);
+  Node*         adr    = in(MemNode::Address);
+  intptr_t      offset = 0;
+  Node*         base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  AllocateNode* alloc  = AllocateNode::Ideal_allocation(base, phase);
+
+  if (offset == Type::OffsetBot)
+    return NULL;            // cannot unalias unless there are precise offsets
+
+  intptr_t size_in_bytes = memory_size();
+
+  Node* mem = in(MemNode::Memory);   // start searching here...
+
+  int cnt = 50;             // Cycle limiter
+  for (;;) {                // While we can dance past unrelated stores...
+    if (--cnt < 0)  break;  // Caught in cycle or a complicated dance?
+
+    if (mem->is_Store()) {
+      Node* st_adr = mem->in(MemNode::Address);
+      intptr_t st_offset = 0;
+      Node* st_base = AddPNode::Ideal_base_and_offset(st_adr, phase, st_offset);
+      if (st_base == NULL)
+        break;              // inscrutable pointer
+      if (st_offset != offset && st_offset != Type::OffsetBot) {
+        const int MAX_STORE = BytesPerLong;
+        if (st_offset >= offset + size_in_bytes ||
+            st_offset <= offset - MAX_STORE ||
+            st_offset <= offset - mem->as_Store()->memory_size()) {
+          // Success:  The offsets are provably independent.
+          // (You may ask, why not just test st_offset != offset and be done?
+          // The answer is that stores of different sizes can co-exist
+          // in the same sequence of RawMem effects.  We sometimes initialize
+          // a whole 'tile' of array elements with a single jint or jlong.)
+          mem = mem->in(MemNode::Memory);
+          continue;           // (a) advance through independent store memory
+        }
+      }
+      if (st_base != base &&
+          detect_ptr_independence(base, alloc,
+                                  st_base,
+                                  AllocateNode::Ideal_allocation(st_base, phase),
+                                  phase)) {
+        // Success:  The bases are provably independent.
+        mem = mem->in(MemNode::Memory);
+        continue;           // (a) advance through independent store memory
+      }
+
+      // (b) At this point, if the bases or offsets do not agree, we lose,
+      // since we have not managed to prove 'this' and 'mem' independent.
+      if (st_base == base && st_offset == offset) {
+        return mem;         // let caller handle steps (c), (d)
+      }
+
+    } else if (mem->is_Proj() && mem->in(0)->is_Initialize()) {
+      InitializeNode* st_init = mem->in(0)->as_Initialize();
+      AllocateNode*  st_alloc = st_init->allocation();
+      if (st_alloc == NULL)
+        break;              // something degenerated
+      bool known_identical = false;
+      bool known_independent = false;
+      if (alloc == st_alloc)
+        known_identical = true;
+      else if (alloc != NULL)
+        known_independent = true;
+      else if (ctrl != NULL &&
+               detect_dominating_control(ctrl, st_alloc->in(0)))
+        known_independent = true;
+
+      if (known_independent) {
+        // The bases are provably independent: Either they are
+        // manifestly distinct allocations, or else the control
+        // of this load dominates the store's allocation.
+        int alias_idx = phase->C->get_alias_index(adr_type());
+        if (alias_idx == Compile::AliasIdxRaw) {
+          mem = st_alloc->in(TypeFunc::Memory);
+        } else {
+          mem = st_init->memory(alias_idx);
+        }
+        continue;           // (a) advance through independent store memory
+      }
+
+      // (b) at this point, if we are not looking at a store initializing
+      // the same allocation we are loading from, we lose.
+      if (known_identical) {
+        // From caller, can_see_stored_value will consult find_captured_store.
+        return mem;         // let caller handle steps (c), (d)
+      }
+
+    }
+
+    // Unless there is an explicit 'continue', we must bail out here,
+    // because 'mem' is an inscrutable memory state (e.g., a call).
+    break;
+  }
+
+  return NULL;              // bail out
+}
+
+//----------------------calculate_adr_type-------------------------------------
+// Helper function.  Notices when the given type of address hits top or bottom.
+// Also, asserts a cross-check of the type against the expected address type.
+const TypePtr* MemNode::calculate_adr_type(const Type* t, const TypePtr* cross_check) {
+  if (t == Type::TOP)  return NULL; // does not touch memory any more?
+  #ifdef PRODUCT
+  cross_check = NULL;
+  #else
+  if (!VerifyAliases || is_error_reported() || Node::in_dump())  cross_check = NULL;
+  #endif
+  const TypePtr* tp = t->isa_ptr();
+  if (tp == NULL) {
+    assert(cross_check == NULL || cross_check == TypePtr::BOTTOM, "expected memory type must be wide");
+    return TypePtr::BOTTOM;           // touches lots of memory
+  } else {
+    #ifdef ASSERT
+    // %%%% [phh] We don't check the alias index if cross_check is
+    //            TypeRawPtr::BOTTOM.  Needs to be investigated.
+    if (cross_check != NULL &&
+        cross_check != TypePtr::BOTTOM &&
+        cross_check != TypeRawPtr::BOTTOM) {
+      // Recheck the alias index, to see if it has changed (due to a bug).
+      Compile* C = Compile::current();
+      assert(C->get_alias_index(cross_check) == C->get_alias_index(tp),
+             "must stay in the original alias category");
+      // The type of the address must be contained in the adr_type,
+      // disregarding "null"-ness.
+      // (We make an exception for TypeRawPtr::BOTTOM, which is a bit bucket.)
+      const TypePtr* tp_notnull = tp->join(TypePtr::NOTNULL)->is_ptr();
+      assert(cross_check->meet(tp_notnull) == cross_check,
+             "real address must not escape from expected memory type");
+    }
+    #endif
+    return tp;
+  }
+}
+
+//------------------------adr_phi_is_loop_invariant----------------------------
+// A helper function for Ideal_DU_postCCP to check if a Phi in a counted
+// loop is loop invariant. Make a quick traversal of Phi and associated
+// CastPP nodes, looking to see if they are a closed group within the loop.
+bool MemNode::adr_phi_is_loop_invariant(Node* adr_phi, Node* cast) {
+  // The idea is that the phi-nest must boil down to only CastPP nodes
+  // with the same data. This implies that any path into the loop already
+  // includes such a CastPP, and so the original cast, whatever its input,
+  // must be covered by an equivalent cast, with an earlier control input.
+  ResourceMark rm;
+
+  // The loop entry input of the phi should be the unique dominating
+  // node for every Phi/CastPP in the loop.
+  Unique_Node_List closure;
+  closure.push(adr_phi->in(LoopNode::EntryControl));
+
+  // Add the phi node and the cast to the worklist.
+  Unique_Node_List worklist;
+  worklist.push(adr_phi);
+  if( cast != NULL ){
+    if( !cast->is_ConstraintCast() ) return false;
+    worklist.push(cast);
+  }
+
+  // Begin recursive walk of phi nodes.
+  while( worklist.size() ){
+    // Take a node off the worklist
+    Node *n = worklist.pop();
+    if( !closure.member(n) ){
+      // Add it to the closure.
+      closure.push(n);
+      // Make a sanity check to ensure we don't waste too much time here.
+      if( closure.size() > 20) return false;
+      // This node is OK if:
+      //  - it is a cast of an identical value
+      //  - or it is a phi node (then we add its inputs to the worklist)
+      // Otherwise, the node is not OK, and we presume the cast is not invariant
+      if( n->is_ConstraintCast() ){
+        worklist.push(n->in(1));
+      } else if( n->is_Phi() ) {
+        for( uint i = 1; i < n->req(); i++ ) {
+          worklist.push(n->in(i));
+        }
+      } else {
+        return false;
+      }
+    }
+  }
+
+  // Quit when the worklist is empty, and we've found no offending nodes.
+  return true;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Find any cast-away of null-ness and keep its control.  Null cast-aways are
+// going away in this pass and we need to make this memory op depend on the
+// gating null check.
+
+// I tried to leave the CastPP's in.  This makes the graph more accurate in
+// some sense; we get to keep around the knowledge that an oop is not-null
+// after some test.  Alas, the CastPP's interfere with GVN (some values are
+// the regular oop, some are the CastPP of the oop, all merge at Phi's which
+// cannot collapse, etc).  This cost us 10% on SpecJVM, even when I removed
+// some of the more trivial cases in the optimizer.  Removing more useless
+// Phi's started allowing Loads to illegally float above null checks.  I gave
+// up on this approach.  CNC 10/20/2000
+Node *MemNode::Ideal_DU_postCCP( PhaseCCP *ccp ) {
+  Node *ctr = in(MemNode::Control);
+  Node *mem = in(MemNode::Memory);
+  Node *adr = in(MemNode::Address);
+  Node *skipped_cast = NULL;
+  // Need a null check?  Regular static accesses do not because they are
+  // from constant addresses.  Array ops are gated by the range check (which
+  // always includes a NULL check).  Just check field ops.
+  if( !ctr ) {
+    // Scan upwards for the highest location we can place this memory op.
+    while( true ) {
+      switch( adr->Opcode() ) {
+
+      case Op_AddP:             // No change to NULL-ness, so peek thru AddP's
+        adr = adr->in(AddPNode::Base);
+        continue;
+
+      case Op_CastPP:
+        // If the CastPP is useless, just peek on through it.
+        if( ccp->type(adr) == ccp->type(adr->in(1)) ) {
+          // Remember the cast that we've peeked though. If we peek
+          // through more than one, then we end up remembering the highest
+          // one, that is, if in a loop, the one closest to the top.
+          skipped_cast = adr;
+          adr = adr->in(1);
+          continue;
+        }
+        // CastPP is going away in this pass!  We need this memory op to be
+        // control-dependent on the test that is guarding the CastPP.
+        ccp->hash_delete(this);
+        set_req(MemNode::Control, adr->in(0));
+        ccp->hash_insert(this);
+        return this;
+
+      case Op_Phi:
+        // Attempt to float above a Phi to some dominating point.
+        if (adr->in(0) != NULL && adr->in(0)->is_CountedLoop()) {
+          // If we've already peeked through a Cast (which could have set the
+          // control), we can't float above a Phi, because the skipped Cast
+          // may not be loop invariant.
+          if (adr_phi_is_loop_invariant(adr, skipped_cast)) {
+            adr = adr->in(1);
+            continue;
+          }
+        }
+
+        // Intentional fallthrough!
+
+        // No obvious dominating point.  The mem op is pinned below the Phi
+        // by the Phi itself.  If the Phi goes away (no true value is merged)
+        // then the mem op can float, but not indefinitely.  It must be pinned
+        // behind the controls leading to the Phi.
+      case Op_CheckCastPP:
+        // These usually stick around to change address type, however a
+        // useless one can be elided and we still need to pick up a control edge
+        if (adr->in(0) == NULL) {
+          // This CheckCastPP node has NO control and is likely useless. But we
+          // need check further up the ancestor chain for a control input to keep
+          // the node in place. 4959717.
+          skipped_cast = adr;
+          adr = adr->in(1);
+          continue;
+        }
+        ccp->hash_delete(this);
+        set_req(MemNode::Control, adr->in(0));
+        ccp->hash_insert(this);
+        return this;
+
+        // List of "safe" opcodes; those that implicitly block the memory
+        // op below any null check.
+      case Op_CastX2P:          // no null checks on native pointers
+      case Op_Parm:             // 'this' pointer is not null
+      case Op_LoadP:            // Loading from within a klass
+      case Op_LoadKlass:        // Loading from within a klass
+      case Op_ConP:             // Loading from a klass
+      case Op_CreateEx:         // Sucking up the guts of an exception oop
+      case Op_Con:              // Reading from TLS
+      case Op_CMoveP:           // CMoveP is pinned
+        break;                  // No progress
+
+      case Op_Proj:             // Direct call to an allocation routine
+      case Op_SCMemProj:        // Memory state from store conditional ops
+#ifdef ASSERT
+        {
+          assert(adr->as_Proj()->_con == TypeFunc::Parms, "must be return value");
+          const Node* call = adr->in(0);
+          if (call->is_CallStaticJava()) {
+            const CallStaticJavaNode* call_java = call->as_CallStaticJava();
+            assert(call_java && call_java->method() == NULL, "must be runtime call");
+            // We further presume that this is one of
+            // new_instance_Java, new_array_Java, or
+            // the like, but do not assert for this.
+          } else if (call->is_Allocate()) {
+            // similar case to new_instance_Java, etc.
+          } else if (!call->is_CallLeaf()) {
+            // Projections from fetch_oop (OSR) are allowed as well.
+            ShouldNotReachHere();
+          }
+        }
+#endif
+        break;
+      default:
+        ShouldNotReachHere();
+      }
+      break;
+    }
+  }
+
+  return  NULL;               // No progress
+}
+
+
+//=============================================================================
+uint LoadNode::size_of() const { return sizeof(*this); }
+uint LoadNode::cmp( const Node &n ) const
+{ return !Type::cmp( _type, ((LoadNode&)n)._type ); }
+const Type *LoadNode::bottom_type() const { return _type; }
+uint LoadNode::ideal_reg() const {
+  return Matcher::base2reg[_type->base()];
+}
+
+#ifndef PRODUCT
+void LoadNode::dump_spec(outputStream *st) const {
+  MemNode::dump_spec(st);
+  if( !Verbose && !WizardMode ) {
+    // standard dump does this in Verbose and WizardMode
+    st->print(" #"); _type->dump_on(st);
+  }
+}
+#endif
+
+
+//----------------------------LoadNode::make-----------------------------------
+// Polymorphic factory method:
+LoadNode *LoadNode::make( Compile *C, Node *ctl, Node *mem, Node *adr, const TypePtr* adr_type, const Type *rt, BasicType bt ) {
+  // sanity check the alias category against the created node type
+  assert(!(adr_type->isa_oopptr() &&
+           adr_type->offset() == oopDesc::klass_offset_in_bytes()),
+         "use LoadKlassNode instead");
+  assert(!(adr_type->isa_aryptr() &&
+           adr_type->offset() == arrayOopDesc::length_offset_in_bytes()),
+         "use LoadRangeNode instead");
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:    return new (C, 3) LoadBNode(ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_INT:     return new (C, 3) LoadINode(ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_CHAR:    return new (C, 3) LoadCNode(ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_SHORT:   return new (C, 3) LoadSNode(ctl, mem, adr, adr_type, rt->is_int()    );
+  case T_LONG:    return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long()   );
+  case T_FLOAT:   return new (C, 3) LoadFNode(ctl, mem, adr, adr_type, rt              );
+  case T_DOUBLE:  return new (C, 3) LoadDNode(ctl, mem, adr, adr_type, rt              );
+  case T_ADDRESS: return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_ptr()    );
+  case T_OBJECT:  return new (C, 3) LoadPNode(ctl, mem, adr, adr_type, rt->is_oopptr());
+  }
+  ShouldNotReachHere();
+  return (LoadNode*)NULL;
+}
+
+LoadLNode* LoadLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt) {
+  bool require_atomic = true;
+  return new (C, 3) LoadLNode(ctl, mem, adr, adr_type, rt->is_long(), require_atomic);
+}
+
+
+
+
+//------------------------------hash-------------------------------------------
+uint LoadNode::hash() const {
+  // unroll addition of interesting fields
+  return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address);
+}
+
+//---------------------------can_see_stored_value------------------------------
+// This routine exists to make sure this set of tests is done the same
+// everywhere.  We need to make a coordinated change: first LoadNode::Ideal
+// will change the graph shape in a way which makes memory alive twice at the
+// same time (uses the Oracle model of aliasing), then some
+// LoadXNode::Identity will fold things back to the equivalence-class model
+// of aliasing.
+Node* MemNode::can_see_stored_value(Node* st, PhaseTransform* phase) const {
+  Node* ld_adr = in(MemNode::Address);
+
+  // Loop around twice in the case Load -> Initialize -> Store.
+  // (See PhaseIterGVN::add_users_to_worklist, which knows about this case.)
+  for (int trip = 0; trip <= 1; trip++) {
+
+    if (st->is_Store()) {
+      Node* st_adr = st->in(MemNode::Address);
+      if (!phase->eqv(st_adr, ld_adr)) {
+        // Try harder before giving up...  Match raw and non-raw pointers.
+        intptr_t st_off = 0;
+        AllocateNode* alloc = AllocateNode::Ideal_allocation(st_adr, phase, st_off);
+        if (alloc == NULL)       return NULL;
+        intptr_t ld_off = 0;
+        AllocateNode* allo2 = AllocateNode::Ideal_allocation(ld_adr, phase, ld_off);
+        if (alloc != allo2)      return NULL;
+        if (ld_off != st_off)    return NULL;
+        // At this point we have proven something like this setup:
+        //  A = Allocate(...)
+        //  L = LoadQ(,  AddP(CastPP(, A.Parm),, #Off))
+        //  S = StoreQ(, AddP(,        A.Parm  , #Off), V)
+        // (Actually, we haven't yet proven the Q's are the same.)
+        // In other words, we are loading from a casted version of
+        // the same pointer-and-offset that we stored to.
+        // Thus, we are able to replace L by V.
+      }
+      // Now prove that we have a LoadQ matched to a StoreQ, for some Q.
+      if (store_Opcode() != st->Opcode())
+        return NULL;
+      return st->in(MemNode::ValueIn);
+    }
+
+    intptr_t offset = 0;  // scratch
+
+    // A load from a freshly-created object always returns zero.
+    // (This can happen after LoadNode::Ideal resets the load's memory input
+    // to find_captured_store, which returned InitializeNode::zero_memory.)
+    if (st->is_Proj() && st->in(0)->is_Allocate() &&
+        st->in(0) == AllocateNode::Ideal_allocation(ld_adr, phase, offset) &&
+        offset >= st->in(0)->as_Allocate()->minimum_header_size()) {
+      // return a zero value for the load's basic type
+      // (This is one of the few places where a generic PhaseTransform
+      // can create new nodes.  Think of it as lazily manifesting
+      // virtually pre-existing constants.)
+      return phase->zerocon(memory_type());
+    }
+
+    // A load from an initialization barrier can match a captured store.
+    if (st->is_Proj() && st->in(0)->is_Initialize()) {
+      InitializeNode* init = st->in(0)->as_Initialize();
+      AllocateNode* alloc = init->allocation();
+      if (alloc != NULL &&
+          alloc == AllocateNode::Ideal_allocation(ld_adr, phase, offset)) {
+        // examine a captured store value
+        st = init->find_captured_store(offset, memory_size(), phase);
+        if (st != NULL)
+          continue;             // take one more trip around
+      }
+    }
+
+    break;
+  }
+
+  return NULL;
+}
+
+//------------------------------Identity---------------------------------------
+// Loads are identity if previous store is to same address
+Node *LoadNode::Identity( PhaseTransform *phase ) {
+  // If the previous store-maker is the right kind of Store, and the store is
+  // to the same address, then we are equal to the value stored.
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem, phase);
+  if( value ) {
+    // byte, short & char stores truncate naturally.
+    // A load has to load the truncated value which requires
+    // some sort of masking operation and that requires an
+    // Ideal call instead of an Identity call.
+    if (memory_size() < BytesPerInt) {
+      // If the input to the store does not fit with the load's result type,
+      // it must be truncated via an Ideal call.
+      if (!phase->type(value)->higher_equal(phase->type(this)))
+        return this;
+    }
+    // (This works even when value is a Con, but LoadNode::Value
+    // usually runs first, producing the singleton type of the Con.)
+    return value;
+  }
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the load is from Field memory and the pointer is non-null, we can
+// zero out the control input.
+// If the offset is constant and the base is an object allocation,
+// try to hook me up to the exact initializing store.
+Node *LoadNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* p = MemNode::Ideal_common(phase, can_reshape);
+  if (p)  return (p == NodeSentinel) ? NULL : p;
+
+  Node* ctrl    = in(MemNode::Control);
+  Node* address = in(MemNode::Address);
+
+  // Skip up past a SafePoint control.  Cannot do this for Stores because
+  // pointer stores & cardmarks must stay on the same side of a SafePoint.
+  if( ctrl != NULL && ctrl->Opcode() == Op_SafePoint &&
+      phase->C->get_alias_index(phase->type(address)->is_ptr()) != Compile::AliasIdxRaw ) {
+    ctrl = ctrl->in(0);
+    set_req(MemNode::Control,ctrl);
+  }
+
+  // Check for useless control edge in some common special cases
+  if (in(MemNode::Control) != NULL) {
+    intptr_t ignore = 0;
+    Node*    base   = AddPNode::Ideal_base_and_offset(address, phase, ignore);
+    if (base != NULL
+        && phase->type(base)->higher_equal(TypePtr::NOTNULL)
+        && detect_dominating_control(base->in(0), phase->C->start())) {
+      // A method-invariant, non-null address (constant or 'this' argument).
+      set_req(MemNode::Control, NULL);
+    }
+  }
+
+  // Check for prior store with a different base or offset; make Load
+  // independent.  Skip through any number of them.  Bail out if the stores
+  // are in an endless dead cycle and report no progress.  This is a key
+  // transform for Reflection.  However, if after skipping through the Stores
+  // we can't then fold up against a prior store do NOT do the transform as
+  // this amounts to using the 'Oracle' model of aliasing.  It leaves the same
+  // array memory alive twice: once for the hoisted Load and again after the
+  // bypassed Store.  This situation only works if EVERYBODY who does
+  // anti-dependence work knows how to bypass.  I.e. we need all
+  // anti-dependence checks to ask the same Oracle.  Right now, that Oracle is
+  // the alias index stuff.  So instead, peek through Stores and IFF we can
+  // fold up, do so.
+  Node* prev_mem = find_previous_store(phase);
+  // Steps (a), (b):  Walk past independent stores to find an exact match.
+  if (prev_mem != NULL && prev_mem != in(MemNode::Memory)) {
+    // (c) See if we can fold up on the spot, but don't fold up here.
+    // Fold-up might require truncation (for LoadB/LoadS/LoadC) or
+    // just return a prior value, which is done by Identity calls.
+    if (can_see_stored_value(prev_mem, phase)) {
+      // Make ready for step (d):
+      set_req(MemNode::Memory, prev_mem);
+      return this;
+    }
+  }
+
+  return NULL;                  // No further progress
+}
+
+// Helper to recognize certain Klass fields which are invariant across
+// some group of array types (e.g., int[] or all T[] where T < Object).
+const Type*
+LoadNode::load_array_final_field(const TypeKlassPtr *tkls,
+                                 ciKlass* klass) const {
+  if (tkls->offset() == Klass::modifier_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+    // The field is Klass::_modifier_flags.  Return its (constant) value.
+    // (Folds up the 2nd indirection in aClassConstant.getModifiers().)
+    assert(this->Opcode() == Op_LoadI, "must load an int from _modifier_flags");
+    return TypeInt::make(klass->modifier_flags());
+  }
+  if (tkls->offset() == Klass::access_flags_offset_in_bytes() + (int)sizeof(oopDesc)) {
+    // The field is Klass::_access_flags.  Return its (constant) value.
+    // (Folds up the 2nd indirection in Reflection.getClassAccessFlags(aClassConstant).)
+    assert(this->Opcode() == Op_LoadI, "must load an int from _access_flags");
+    return TypeInt::make(klass->access_flags());
+  }
+  if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)) {
+    // The field is Klass::_layout_helper.  Return its constant value if known.
+    assert(this->Opcode() == Op_LoadI, "must load an int from _layout_helper");
+    return TypeInt::make(klass->layout_helper());
+  }
+
+  // No match.
+  return NULL;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *LoadNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  Node* mem = in(MemNode::Memory);
+  const Type *t1 = phase->type(mem);
+  if (t1 == Type::TOP)  return Type::TOP;
+  Node* adr = in(MemNode::Address);
+  const TypePtr* tp = phase->type(adr)->isa_ptr();
+  if (tp == NULL || tp->empty())  return Type::TOP;
+  int off = tp->offset();
+  assert(off != Type::OffsetTop, "case covered by TypePtr::empty");
+
+  // Try to guess loaded type from pointer type
+  if (tp->base() == Type::AryPtr) {
+    const Type *t = tp->is_aryptr()->elem();
+    // Don't do this for integer types. There is only potential profit if
+    // the element type t is lower than _type; that is, for int types, if _type is
+    // more restrictive than t.  This only happens here if one is short and the other
+    // char (both 16 bits), and in those cases we've made an intentional decision
+    // to use one kind of load over the other. See AndINode::Ideal and 4965907.
+    // Also, do not try to narrow the type for a LoadKlass, regardless of offset.
+    //
+    // Yes, it is possible to encounter an expression like (LoadKlass p1:(AddP x x 8))
+    // where the _gvn.type of the AddP is wider than 8.  This occurs when an earlier
+    // copy p0 of (AddP x x 8) has been proven equal to p1, and the p0 has been
+    // subsumed by p1.  If p1 is on the worklist but has not yet been re-transformed,
+    // it is possible that p1 will have a type like Foo*[int+]:NotNull*+any.
+    // In fact, that could have been the original type of p1, and p1 could have
+    // had an original form like p1:(AddP x x (LShiftL quux 3)), where the
+    // expression (LShiftL quux 3) independently optimized to the constant 8.
+    if ((t->isa_int() == NULL) && (t->isa_long() == NULL)
+        && Opcode() != Op_LoadKlass) {
+      // t might actually be lower than _type, if _type is a unique
+      // concrete subclass of abstract class t.
+      // Make sure the reference is not into the header, by comparing
+      // the offset against the offset of the start of the array's data.
+      // Different array types begin at slightly different offsets (12 vs. 16).
+      // We choose T_BYTE as an example base type that is least restrictive
+      // as to alignment, which will therefore produce the smallest
+      // possible base offset.
+      const int min_base_off = arrayOopDesc::base_offset_in_bytes(T_BYTE);
+      if ((uint)off >= (uint)min_base_off) {  // is the offset beyond the header?
+        const Type* jt = t->join(_type);
+        // In any case, do not allow the join, per se, to empty out the type.
+        if (jt->empty() && !t->empty()) {
+          // This can happen if a interface-typed array narrows to a class type.
+          jt = _type;
+        }
+        return jt;
+      }
+    }
+  } else if (tp->base() == Type::InstPtr) {
+    assert( off != Type::OffsetBot ||
+            // arrays can be cast to Objects
+            tp->is_oopptr()->klass()->is_java_lang_Object() ||
+            // unsafe field access may not have a constant offset
+            phase->C->has_unsafe_access(),
+            "Field accesses must be precise" );
+    // For oop loads, we expect the _type to be precise
+  } else if (tp->base() == Type::KlassPtr) {
+    assert( off != Type::OffsetBot ||
+            // arrays can be cast to Objects
+            tp->is_klassptr()->klass()->is_java_lang_Object() ||
+            // also allow array-loading from the primary supertype
+            // array during subtype checks
+            Opcode() == Op_LoadKlass,
+            "Field accesses must be precise" );
+    // For klass/static loads, we expect the _type to be precise
+  }
+
+  const TypeKlassPtr *tkls = tp->isa_klassptr();
+  if (tkls != NULL && !StressReflectiveCode) {
+    ciKlass* klass = tkls->klass();
+    if (klass->is_loaded() && tkls->klass_is_exact()) {
+      // We are loading a field from a Klass metaobject whose identity
+      // is known at compile time (the type is "exact" or "precise").
+      // Check for fields we know are maintained as constants by the VM.
+      if (tkls->offset() == Klass::super_check_offset_offset_in_bytes() + (int)sizeof(oopDesc)) {
+        // The field is Klass::_super_check_offset.  Return its (constant) value.
+        // (Folds up type checking code.)
+        assert(Opcode() == Op_LoadI, "must load an int from _super_check_offset");
+        return TypeInt::make(klass->super_check_offset());
+      }
+      // Compute index into primary_supers array
+      juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+      // Check for overflowing; use unsigned compare to handle the negative case.
+      if( depth < ciKlass::primary_super_limit() ) {
+        // The field is an element of Klass::_primary_supers.  Return its (constant) value.
+        // (Folds up type checking code.)
+        assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
+        ciKlass *ss = klass->super_of_depth(depth);
+        return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
+      }
+      const Type* aift = load_array_final_field(tkls, klass);
+      if (aift != NULL)  return aift;
+      if (tkls->offset() == in_bytes(arrayKlass::component_mirror_offset()) + (int)sizeof(oopDesc)
+          && klass->is_array_klass()) {
+        // The field is arrayKlass::_component_mirror.  Return its (constant) value.
+        // (Folds up aClassConstant.getComponentType, common in Arrays.copyOf.)
+        assert(Opcode() == Op_LoadP, "must load an oop from _component_mirror");
+        return TypeInstPtr::make(klass->as_array_klass()->component_mirror());
+      }
+      if (tkls->offset() == Klass::java_mirror_offset_in_bytes() + (int)sizeof(oopDesc)) {
+        // The field is Klass::_java_mirror.  Return its (constant) value.
+        // (Folds up the 2nd indirection in anObjConstant.getClass().)
+        assert(Opcode() == Op_LoadP, "must load an oop from _java_mirror");
+        return TypeInstPtr::make(klass->java_mirror());
+      }
+    }
+
+    // We can still check if we are loading from the primary_supers array at a
+    // shallow enough depth.  Even though the klass is not exact, entries less
+    // than or equal to its super depth are correct.
+    if (klass->is_loaded() ) {
+      ciType *inner = klass->klass();
+      while( inner->is_obj_array_klass() )
+        inner = inner->as_obj_array_klass()->base_element_type();
+      if( inner->is_instance_klass() &&
+          !inner->as_instance_klass()->flags().is_interface() ) {
+        // Compute index into primary_supers array
+        juint depth = (tkls->offset() - (Klass::primary_supers_offset_in_bytes() + (int)sizeof(oopDesc))) / sizeof(klassOop);
+        // Check for overflowing; use unsigned compare to handle the negative case.
+        if( depth < ciKlass::primary_super_limit() &&
+            depth <= klass->super_depth() ) { // allow self-depth checks to handle self-check case
+          // The field is an element of Klass::_primary_supers.  Return its (constant) value.
+          // (Folds up type checking code.)
+          assert(Opcode() == Op_LoadKlass, "must load a klass from _primary_supers");
+          ciKlass *ss = klass->super_of_depth(depth);
+          return ss ? TypeKlassPtr::make(ss) : TypePtr::NULL_PTR;
+        }
+      }
+    }
+
+    // If the type is enough to determine that the thing is not an array,
+    // we can give the layout_helper a positive interval type.
+    // This will help short-circuit some reflective code.
+    if (tkls->offset() == Klass::layout_helper_offset_in_bytes() + (int)sizeof(oopDesc)
+        && !klass->is_array_klass() // not directly typed as an array
+        && !klass->is_interface()  // specifically not Serializable & Cloneable
+        && !klass->is_java_lang_Object()   // not the supertype of all T[]
+        ) {
+      // Note:  When interfaces are reliable, we can narrow the interface
+      // test to (klass != Serializable && klass != Cloneable).
+      assert(Opcode() == Op_LoadI, "must load an int from _layout_helper");
+      jint min_size = Klass::instance_layout_helper(oopDesc::header_size(), false);
+      // The key property of this type is that it folds up tests
+      // for array-ness, since it proves that the layout_helper is positive.
+      // Thus, a generic value like the basic object layout helper works fine.
+      return TypeInt::make(min_size, max_jint, Type::WidenMin);
+    }
+  }
+
+  // If we are loading from a freshly-allocated object, produce a zero,
+  // if the load is provably beyond the header of the object.
+  // (Also allow a variable load from a fresh array to produce zero.)
+  if (ReduceFieldZeroing) {
+    Node* value = can_see_stored_value(mem,phase);
+    if (value != NULL && value->is_Con())
+      return value->bottom_type();
+  }
+
+  return _type;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not?  Match only the address.
+uint LoadNode::match_edge(uint idx) const {
+  return idx == MemNode::Address;
+}
+
+//--------------------------LoadBNode::Ideal--------------------------------------
+//
+//  If the previous store is to the same address as this load,
+//  and the value stored was larger than a byte, replace this load
+//  with the value stored truncated to a byte.  If no truncation is
+//  needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadBNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if( value && !phase->type(value)->higher_equal( _type ) ) {
+    Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(24)) );
+    return new (phase->C, 3) RShiftINode(result, phase->intcon(24));
+  }
+  // Identity call will handle the case where truncation is not needed.
+  return LoadNode::Ideal(phase, can_reshape);
+}
+
+//--------------------------LoadCNode::Ideal--------------------------------------
+//
+//  If the previous store is to the same address as this load,
+//  and the value stored was larger than a char, replace this load
+//  with the value stored truncated to a char.  If no truncation is
+//  needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadCNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if( value && !phase->type(value)->higher_equal( _type ) )
+    return new (phase->C, 3) AndINode(value,phase->intcon(0xFFFF));
+  // Identity call will handle the case where truncation is not needed.
+  return LoadNode::Ideal(phase, can_reshape);
+}
+
+//--------------------------LoadSNode::Ideal--------------------------------------
+//
+//  If the previous store is to the same address as this load,
+//  and the value stored was larger than a short, replace this load
+//  with the value stored truncated to a short.  If no truncation is
+//  needed, the replacement is done in LoadNode::Identity().
+//
+Node *LoadSNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* mem = in(MemNode::Memory);
+  Node* value = can_see_stored_value(mem,phase);
+  if( value && !phase->type(value)->higher_equal( _type ) ) {
+    Node *result = phase->transform( new (phase->C, 3) LShiftINode(value, phase->intcon(16)) );
+    return new (phase->C, 3) RShiftINode(result, phase->intcon(16));
+  }
+  // Identity call will handle the case where truncation is not needed.
+  return LoadNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+const Type *LoadKlassNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(MemNode::Memory) );
+  if (t1 == Type::TOP)  return Type::TOP;
+  Node *adr = in(MemNode::Address);
+  const Type *t2 = phase->type( adr );
+  if (t2 == Type::TOP)  return Type::TOP;
+  const TypePtr *tp = t2->is_ptr();
+  if (TypePtr::above_centerline(tp->ptr()) ||
+      tp->ptr() == TypePtr::Null)  return Type::TOP;
+
+  // Return a more precise klass, if possible
+  const TypeInstPtr *tinst = tp->isa_instptr();
+  if (tinst != NULL) {
+    ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
+    int offset = tinst->offset();
+    if (ik == phase->C->env()->Class_klass()
+        && (offset == java_lang_Class::klass_offset_in_bytes() ||
+            offset == java_lang_Class::array_klass_offset_in_bytes())) {
+      // We are loading a special hidden field from a Class mirror object,
+      // the field which points to the VM's Klass metaobject.
+      ciType* t = tinst->java_mirror_type();
+      // java_mirror_type returns non-null for compile-time Class constants.
+      if (t != NULL) {
+        // constant oop => constant klass
+        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
+          return TypeKlassPtr::make(ciArrayKlass::make(t));
+        }
+        if (!t->is_klass()) {
+          // a primitive Class (e.g., int.class) has NULL for a klass field
+          return TypePtr::NULL_PTR;
+        }
+        // (Folds up the 1st indirection in aClassConstant.getModifiers().)
+        return TypeKlassPtr::make(t->as_klass());
+      }
+      // non-constant mirror, so we can't tell what's going on
+    }
+    if( !ik->is_loaded() )
+      return _type;             // Bail out if not loaded
+    if (offset == oopDesc::klass_offset_in_bytes()) {
+      if (tinst->klass_is_exact()) {
+        return TypeKlassPtr::make(ik);
+      }
+      // See if we can become precise: no subklasses and no interface
+      // (Note:  We need to support verified interfaces.)
+      if (!ik->is_interface() && !ik->has_subklass()) {
+        //assert(!UseExactTypes, "this code should be useless with exact types");
+        // Add a dependence; if any subclass added we need to recompile
+        if (!ik->is_final()) {
+          // %%% should use stronger assert_unique_concrete_subtype instead
+          phase->C->dependencies()->assert_leaf_type(ik);
+        }
+        // Return precise klass
+        return TypeKlassPtr::make(ik);
+      }
+
+      // Return root of possible klass
+      return TypeKlassPtr::make(TypePtr::NotNull, ik, 0/*offset*/);
+    }
+  }
+
+  // Check for loading klass from an array
+  const TypeAryPtr *tary = tp->isa_aryptr();
+  if( tary != NULL ) {
+    ciKlass *tary_klass = tary->klass();
+    if (tary_klass != NULL   // can be NULL when at BOTTOM or TOP
+        && tary->offset() == oopDesc::klass_offset_in_bytes()) {
+      if (tary->klass_is_exact()) {
+        return TypeKlassPtr::make(tary_klass);
+      }
+      ciArrayKlass *ak = tary->klass()->as_array_klass();
+      // If the klass is an object array, we defer the question to the
+      // array component klass.
+      if( ak->is_obj_array_klass() ) {
+        assert( ak->is_loaded(), "" );
+        ciKlass *base_k = ak->as_obj_array_klass()->base_element_klass();
+        if( base_k->is_loaded() && base_k->is_instance_klass() ) {
+          ciInstanceKlass* ik = base_k->as_instance_klass();
+          // See if we can become precise: no subklasses and no interface
+          if (!ik->is_interface() && !ik->has_subklass()) {
+            //assert(!UseExactTypes, "this code should be useless with exact types");
+            // Add a dependence; if any subclass added we need to recompile
+            if (!ik->is_final()) {
+              phase->C->dependencies()->assert_leaf_type(ik);
+            }
+            // Return precise array klass
+            return TypeKlassPtr::make(ak);
+          }
+        }
+        return TypeKlassPtr::make(TypePtr::NotNull, ak, 0/*offset*/);
+      } else {                  // Found a type-array?
+        //assert(!UseExactTypes, "this code should be useless with exact types");
+        assert( ak->is_type_array_klass(), "" );
+        return TypeKlassPtr::make(ak); // These are always precise
+      }
+    }
+  }
+
+  // Check for loading klass from an array klass
+  const TypeKlassPtr *tkls = tp->isa_klassptr();
+  if (tkls != NULL && !StressReflectiveCode) {
+    ciKlass* klass = tkls->klass();
+    if( !klass->is_loaded() )
+      return _type;             // Bail out if not loaded
+    if( klass->is_obj_array_klass() &&
+        (uint)tkls->offset() == objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc)) {
+      ciKlass* elem = klass->as_obj_array_klass()->element_klass();
+      // // Always returning precise element type is incorrect,
+      // // e.g., element type could be object and array may contain strings
+      // return TypeKlassPtr::make(TypePtr::Constant, elem, 0);
+
+      // The array's TypeKlassPtr was declared 'precise' or 'not precise'
+      // according to the element type's subclassing.
+      return TypeKlassPtr::make(tkls->ptr(), elem, 0/*offset*/);
+    }
+    if( klass->is_instance_klass() && tkls->klass_is_exact() &&
+        (uint)tkls->offset() == Klass::super_offset_in_bytes() + sizeof(oopDesc)) {
+      ciKlass* sup = klass->as_instance_klass()->super();
+      // The field is Klass::_super.  Return its (constant) value.
+      // (Folds up the 2nd indirection in aClassConstant.getSuperClass().)
+      return sup ? TypeKlassPtr::make(sup) : TypePtr::NULL_PTR;
+    }
+  }
+
+  // Bailout case
+  return LoadNode::Value(phase);
+}
+
+//------------------------------Identity---------------------------------------
+// To clean up reflective code, simplify k.java_mirror.as_klass to plain k.
+// Also feed through the klass in Allocate(...klass...)._klass.
+Node* LoadKlassNode::Identity( PhaseTransform *phase ) {
+  Node* x = LoadNode::Identity(phase);
+  if (x != this)  return x;
+
+  // Take apart the address into an oop and and offset.
+  // Return 'this' if we cannot.
+  Node*    adr    = in(MemNode::Address);
+  intptr_t offset = 0;
+  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  if (base == NULL)     return this;
+  const TypeOopPtr* toop = phase->type(adr)->isa_oopptr();
+  if (toop == NULL)     return this;
+
+  // We can fetch the klass directly through an AllocateNode.
+  // This works even if the klass is not constant (clone or newArray).
+  if (offset == oopDesc::klass_offset_in_bytes()) {
+    Node* allocated_klass = AllocateNode::Ideal_klass(base, phase);
+    if (allocated_klass != NULL) {
+      return allocated_klass;
+    }
+  }
+
+  // Simplify k.java_mirror.as_klass to plain k, where k is a klassOop.
+  // Simplify ak.component_mirror.array_klass to plain ak, ak an arrayKlass.
+  // See inline_native_Class_query for occurrences of these patterns.
+  // Java Example:  x.getClass().isAssignableFrom(y)
+  // Java Example:  Array.newInstance(x.getClass().getComponentType(), n)
+  //
+  // This improves reflective code, often making the Class
+  // mirror go completely dead.  (Current exception:  Class
+  // mirrors may appear in debug info, but we could clean them out by
+  // introducing a new debug info operator for klassOop.java_mirror).
+  if (toop->isa_instptr() && toop->klass() == phase->C->env()->Class_klass()
+      && (offset == java_lang_Class::klass_offset_in_bytes() ||
+          offset == java_lang_Class::array_klass_offset_in_bytes())) {
+    // We are loading a special hidden field from a Class mirror,
+    // the field which points to its Klass or arrayKlass metaobject.
+    if (base->is_Load()) {
+      Node* adr2 = base->in(MemNode::Address);
+      const TypeKlassPtr* tkls = phase->type(adr2)->isa_klassptr();
+      if (tkls != NULL && !tkls->empty()
+          && (tkls->klass()->is_instance_klass() ||
+              tkls->klass()->is_array_klass())
+          && adr2->is_AddP()
+          ) {
+        int mirror_field = Klass::java_mirror_offset_in_bytes();
+        if (offset == java_lang_Class::array_klass_offset_in_bytes()) {
+          mirror_field = in_bytes(arrayKlass::component_mirror_offset());
+        }
+        if (tkls->offset() == mirror_field + (int)sizeof(oopDesc)) {
+          return adr2->in(AddPNode::Base);
+        }
+      }
+    }
+  }
+
+  return this;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *LoadRangeNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(MemNode::Memory) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  Node *adr = in(MemNode::Address);
+  const Type *t2 = phase->type( adr );
+  if( t2 == Type::TOP ) return Type::TOP;
+  const TypePtr *tp = t2->is_ptr();
+  if (TypePtr::above_centerline(tp->ptr()))  return Type::TOP;
+  const TypeAryPtr *tap = tp->isa_aryptr();
+  if( !tap ) return _type;
+  return tap->size();
+}
+
+//------------------------------Identity---------------------------------------
+// Feed through the length in AllocateArray(...length...)._length.
+Node* LoadRangeNode::Identity( PhaseTransform *phase ) {
+  Node* x = LoadINode::Identity(phase);
+  if (x != this)  return x;
+
+  // Take apart the address into an oop and and offset.
+  // Return 'this' if we cannot.
+  Node*    adr    = in(MemNode::Address);
+  intptr_t offset = 0;
+  Node*    base   = AddPNode::Ideal_base_and_offset(adr, phase, offset);
+  if (base == NULL)     return this;
+  const TypeAryPtr* tary = phase->type(adr)->isa_aryptr();
+  if (tary == NULL)     return this;
+
+  // We can fetch the length directly through an AllocateArrayNode.
+  // This works even if the length is not constant (clone or newArray).
+  if (offset == arrayOopDesc::length_offset_in_bytes()) {
+    Node* allocated_length = AllocateArrayNode::Ideal_length(base, phase);
+    if (allocated_length != NULL) {
+      return allocated_length;
+    }
+  }
+
+  return this;
+
+}
+//=============================================================================
+//---------------------------StoreNode::make-----------------------------------
+// Polymorphic factory method:
+StoreNode* StoreNode::make( Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val, BasicType bt ) {
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:    return new (C, 4) StoreBNode(ctl, mem, adr, adr_type, val);
+  case T_INT:     return new (C, 4) StoreINode(ctl, mem, adr, adr_type, val);
+  case T_CHAR:
+  case T_SHORT:   return new (C, 4) StoreCNode(ctl, mem, adr, adr_type, val);
+  case T_LONG:    return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val);
+  case T_FLOAT:   return new (C, 4) StoreFNode(ctl, mem, adr, adr_type, val);
+  case T_DOUBLE:  return new (C, 4) StoreDNode(ctl, mem, adr, adr_type, val);
+  case T_ADDRESS:
+  case T_OBJECT:  return new (C, 4) StorePNode(ctl, mem, adr, adr_type, val);
+  }
+  ShouldNotReachHere();
+  return (StoreNode*)NULL;
+}
+
+StoreLNode* StoreLNode::make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val) {
+  bool require_atomic = true;
+  return new (C, 4) StoreLNode(ctl, mem, adr, adr_type, val, require_atomic);
+}
+
+
+//--------------------------bottom_type----------------------------------------
+const Type *StoreNode::bottom_type() const {
+  return Type::MEMORY;
+}
+
+//------------------------------hash-------------------------------------------
+uint StoreNode::hash() const {
+  // unroll addition of interesting fields
+  //return (uintptr_t)in(Control) + (uintptr_t)in(Memory) + (uintptr_t)in(Address) + (uintptr_t)in(ValueIn);
+
+  // Since they are not commoned, do not hash them:
+  return NO_HASH;
+}
+
+//------------------------------Ideal------------------------------------------
+// Change back-to-back Store(, p, x) -> Store(m, p, y) to Store(m, p, x).
+// When a store immediately follows a relevant allocation/initialization,
+// try to capture it into the initialization, or hoist it above.
+Node *StoreNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node* p = MemNode::Ideal_common(phase, can_reshape);
+  if (p)  return (p == NodeSentinel) ? NULL : p;
+
+  Node* mem     = in(MemNode::Memory);
+  Node* address = in(MemNode::Address);
+
+  // Back-to-back stores to same address?  Fold em up.
+  // Generally unsafe if I have intervening uses...
+  if (mem->is_Store() && phase->eqv_uncast(mem->in(MemNode::Address), address)) {
+    // Looking at a dead closed cycle of memory?
+    assert(mem != mem->in(MemNode::Memory), "dead loop in StoreNode::Ideal");
+
+    assert(Opcode() == mem->Opcode() ||
+           phase->C->get_alias_index(adr_type()) == Compile::AliasIdxRaw,
+           "no mismatched stores, except on raw memory");
+
+    if (mem->outcnt() == 1 &&           // check for intervening uses
+        mem->as_Store()->memory_size() <= this->memory_size()) {
+      // If anybody other than 'this' uses 'mem', we cannot fold 'mem' away.
+      // For example, 'mem' might be the final state at a conditional return.
+      // Or, 'mem' might be used by some node which is live at the same time
+      // 'this' is live, which might be unschedulable.  So, require exactly
+      // ONE user, the 'this' store, until such time as we clone 'mem' for
+      // each of 'mem's uses (thus making the exactly-1-user-rule hold true).
+      if (can_reshape) {  // (%%% is this an anachronism?)
+        set_req_X(MemNode::Memory, mem->in(MemNode::Memory),
+                  phase->is_IterGVN());
+      } else {
+        // It's OK to do this in the parser, since DU info is always accurate,
+        // and the parser always refers to nodes via SafePointNode maps.
+        set_req(MemNode::Memory, mem->in(MemNode::Memory));
+      }
+      return this;
+    }
+  }
+
+  // Capture an unaliased, unconditional, simple store into an initializer.
+  // Or, if it is independent of the allocation, hoist it above the allocation.
+  if (ReduceFieldZeroing && /*can_reshape &&*/
+      mem->is_Proj() && mem->in(0)->is_Initialize()) {
+    InitializeNode* init = mem->in(0)->as_Initialize();
+    intptr_t offset = init->can_capture_store(this, phase);
+    if (offset > 0) {
+      Node* moved = init->capture_store(this, offset, phase);
+      // If the InitializeNode captured me, it made a raw copy of me,
+      // and I need to disappear.
+      if (moved != NULL) {
+        // %%% hack to ensure that Ideal returns a new node:
+        mem = MergeMemNode::make(phase->C, mem);
+        return mem;             // fold me away
+      }
+    }
+  }
+
+  return NULL;                  // No further progress
+}
+
+//------------------------------Value-----------------------------------------
+const Type *StoreNode::Value( PhaseTransform *phase ) const {
+  // Either input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(MemNode::Memory) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  const Type *t2 = phase->type( in(MemNode::Address) );
+  if( t2 == Type::TOP ) return Type::TOP;
+  const Type *t3 = phase->type( in(MemNode::ValueIn) );
+  if( t3 == Type::TOP ) return Type::TOP;
+  return Type::MEMORY;
+}
+
+//------------------------------Identity---------------------------------------
+// Remove redundant stores:
+//   Store(m, p, Load(m, p)) changes to m.
+//   Store(, p, x) -> Store(m, p, x) changes to Store(m, p, x).
+Node *StoreNode::Identity( PhaseTransform *phase ) {
+  Node* mem = in(MemNode::Memory);
+  Node* adr = in(MemNode::Address);
+  Node* val = in(MemNode::ValueIn);
+
+  // Load then Store?  Then the Store is useless
+  if (val->is_Load() &&
+      phase->eqv_uncast( val->in(MemNode::Address), adr ) &&
+      phase->eqv_uncast( val->in(MemNode::Memory ), mem ) &&
+      val->as_Load()->store_Opcode() == Opcode()) {
+    return mem;
+  }
+
+  // Two stores in a row of the same value?
+  if (mem->is_Store() &&
+      phase->eqv_uncast( mem->in(MemNode::Address), adr ) &&
+      phase->eqv_uncast( mem->in(MemNode::ValueIn), val ) &&
+      mem->Opcode() == Opcode()) {
+    return mem;
+  }
+
+  // Store of zero anywhere into a freshly-allocated object?
+  // Then the store is useless.
+  // (It must already have been captured by the InitializeNode.)
+  if (ReduceFieldZeroing && phase->type(val)->is_zero_type()) {
+    // a newly allocated object is already all-zeroes everywhere
+    if (mem->is_Proj() && mem->in(0)->is_Allocate()) {
+      return mem;
+    }
+
+    // the store may also apply to zero-bits in an earlier object
+    Node* prev_mem = find_previous_store(phase);
+    // Steps (a), (b):  Walk past independent stores to find an exact match.
+    if (prev_mem != NULL) {
+      Node* prev_val = can_see_stored_value(prev_mem, phase);
+      if (prev_val != NULL && phase->eqv(prev_val, val)) {
+        // prev_val and val might differ by a cast; it would be good
+        // to keep the more informative of the two.
+        return mem;
+      }
+    }
+  }
+
+  return this;
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not?  Match only memory & value
+uint StoreNode::match_edge(uint idx) const {
+  return idx == MemNode::Address || idx == MemNode::ValueIn;
+}
+
+//------------------------------cmp--------------------------------------------
+// Do not common stores up together.  They generally have to be split
+// back up anyways, so do not bother.
+uint StoreNode::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+//------------------------------Ideal_masked_input-----------------------------
+// Check for a useless mask before a partial-word store
+// (StoreB ... (AndI valIn conIa) )
+// If (conIa & mask == mask) this simplifies to
+// (StoreB ... (valIn) )
+Node *StoreNode::Ideal_masked_input(PhaseGVN *phase, uint mask) {
+  Node *val = in(MemNode::ValueIn);
+  if( val->Opcode() == Op_AndI ) {
+    const TypeInt *t = phase->type( val->in(2) )->isa_int();
+    if( t && t->is_con() && (t->get_con() & mask) == mask ) {
+      set_req(MemNode::ValueIn, val->in(1));
+      return this;
+    }
+  }
+  return NULL;
+}
+
+
+//------------------------------Ideal_sign_extended_input----------------------
+// Check for useless sign-extension before a partial-word store
+// (StoreB ... (RShiftI _ (LShiftI _ valIn conIL ) conIR) )
+// If (conIL == conIR && conIR <= num_bits)  this simplifies to
+// (StoreB ... (valIn) )
+Node *StoreNode::Ideal_sign_extended_input(PhaseGVN *phase, int num_bits) {
+  Node *val = in(MemNode::ValueIn);
+  if( val->Opcode() == Op_RShiftI ) {
+    const TypeInt *t = phase->type( val->in(2) )->isa_int();
+    if( t && t->is_con() && (t->get_con() <= num_bits) ) {
+      Node *shl = val->in(1);
+      if( shl->Opcode() == Op_LShiftI ) {
+        const TypeInt *t2 = phase->type( shl->in(2) )->isa_int();
+        if( t2 && t2->is_con() && (t2->get_con() == t->get_con()) ) {
+          set_req(MemNode::ValueIn, shl->in(1));
+          return this;
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
+//------------------------------value_never_loaded-----------------------------------
+// Determine whether there are any possible loads of the value stored.
+// For simplicity, we actually check if there are any loads from the
+// address stored to, not just for loads of the value stored by this node.
+//
+bool StoreNode::value_never_loaded( PhaseTransform *phase) const {
+  Node *adr = in(Address);
+  const TypeOopPtr *adr_oop = phase->type(adr)->isa_oopptr();
+  if (adr_oop == NULL)
+    return false;
+  if (!adr_oop->is_instance())
+    return false; // if not a distinct instance, there may be aliases of the address
+  for (DUIterator_Fast imax, i = adr->fast_outs(imax); i < imax; i++) {
+    Node *use = adr->fast_out(i);
+    int opc = use->Opcode();
+    if (use->is_Load() || use->is_LoadStore()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// If the store is from an AND mask that leaves the low bits untouched, then
+// we can skip the AND operation.  If the store is from a sign-extension
+// (a left shift, then right shift) we can skip both.
+Node *StoreBNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFF);
+  if( progress != NULL ) return progress;
+
+  progress = StoreNode::Ideal_sign_extended_input(phase, 24);
+  if( progress != NULL ) return progress;
+
+  // Finally check the default case
+  return StoreNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// If the store is from an AND mask that leaves the low bits untouched, then
+// we can skip the AND operation
+Node *StoreCNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  Node *progress = StoreNode::Ideal_masked_input(phase, 0xFFFF);
+  if( progress != NULL ) return progress;
+
+  progress = StoreNode::Ideal_sign_extended_input(phase, 16);
+  if( progress != NULL ) return progress;
+
+  // Finally check the default case
+  return StoreNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *StoreCMNode::Identity( PhaseTransform *phase ) {
+  // No need to card mark when storing a null ptr
+  Node* my_store = in(MemNode::OopStore);
+  if (my_store->is_Store()) {
+    const Type *t1 = phase->type( my_store->in(MemNode::ValueIn) );
+    if( t1 == TypePtr::NULL_PTR ) {
+      return in(MemNode::Memory);
+    }
+  }
+  return this;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *StoreCMNode::Value( PhaseTransform *phase ) const {
+  // If extra input is TOP ==> the result is TOP
+  const Type *t1 = phase->type( in(MemNode::OopStore) );
+  if( t1 == Type::TOP ) return Type::TOP;
+
+  return StoreNode::Value( phase );
+}
+
+
+//=============================================================================
+//----------------------------------SCMemProjNode------------------------------
+const Type * SCMemProjNode::Value( PhaseTransform *phase ) const
+{
+  return bottom_type();
+}
+
+//=============================================================================
+LoadStoreNode::LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex ) : Node(5) {
+  init_req(MemNode::Control, c  );
+  init_req(MemNode::Memory , mem);
+  init_req(MemNode::Address, adr);
+  init_req(MemNode::ValueIn, val);
+  init_req(         ExpectedIn, ex );
+  init_class_id(Class_LoadStore);
+
+}
+
+//=============================================================================
+//-------------------------------adr_type--------------------------------------
+// Do we Match on this edge index or not?  Do not match memory
+const TypePtr* ClearArrayNode::adr_type() const {
+  Node *adr = in(3);
+  return MemNode::calculate_adr_type(adr->bottom_type());
+}
+
+//------------------------------match_edge-------------------------------------
+// Do we Match on this edge index or not?  Do not match memory
+uint ClearArrayNode::match_edge(uint idx) const {
+  return idx > 1;
+}
+
+//------------------------------Identity---------------------------------------
+// Clearing a zero length array does nothing
+Node *ClearArrayNode::Identity( PhaseTransform *phase ) {
+  return phase->type(in(2))->higher_equal(TypeInt::ZERO)  ? in(1) : this;
+}
+
+//------------------------------Idealize---------------------------------------
+// Clearing a short array is faster with stores
+Node *ClearArrayNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  const int unit = BytesPerLong;
+  const TypeX* t = phase->type(in(2))->isa_intptr_t();
+  if (!t)  return NULL;
+  if (!t->is_con())  return NULL;
+  intptr_t raw_count = t->get_con();
+  intptr_t size = raw_count;
+  if (!Matcher::init_array_count_is_in_bytes) size *= unit;
+  // Clearing nothing uses the Identity call.
+  // Negative clears are possible on dead ClearArrays
+  // (see jck test stmt114.stmt11402.val).
+  if (size <= 0 || size % unit != 0)  return NULL;
+  intptr_t count = size / unit;
+  // Length too long; use fast hardware clear
+  if (size > Matcher::init_array_short_size)  return NULL;
+  Node *mem = in(1);
+  if( phase->type(mem)==Type::TOP ) return NULL;
+  Node *adr = in(3);
+  const Type* at = phase->type(adr);
+  if( at==Type::TOP ) return NULL;
+  const TypePtr* atp = at->isa_ptr();
+  // adjust atp to be the correct array element address type
+  if (atp == NULL)  atp = TypePtr::BOTTOM;
+  else              atp = atp->add_offset(Type::OffsetBot);
+  // Get base for derived pointer purposes
+  if( adr->Opcode() != Op_AddP ) Unimplemented();
+  Node *base = adr->in(1);
+
+  Node *zero = phase->makecon(TypeLong::ZERO);
+  Node *off  = phase->MakeConX(BytesPerLong);
+  mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
+  count--;
+  while( count-- ) {
+    mem = phase->transform(mem);
+    adr = phase->transform(new (phase->C, 4) AddPNode(base,adr,off));
+    mem = new (phase->C, 4) StoreLNode(in(0),mem,adr,atp,zero);
+  }
+  return mem;
+}
+
+//----------------------------clear_memory-------------------------------------
+// Generate code to initialize object storage to zero.
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   intptr_t start_offset,
+                                   Node* end_offset,
+                                   PhaseGVN* phase) {
+  Compile* C = phase->C;
+  intptr_t offset = start_offset;
+
+  int unit = BytesPerLong;
+  if ((offset % unit) != 0) {
+    Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(offset));
+    adr = phase->transform(adr);
+    const TypePtr* atp = TypeRawPtr::BOTTOM;
+    mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+    mem = phase->transform(mem);
+    offset += BytesPerInt;
+  }
+  assert((offset % unit) == 0, "");
+
+  // Initialize the remaining stuff, if any, with a ClearArray.
+  return clear_memory(ctl, mem, dest, phase->MakeConX(offset), end_offset, phase);
+}
+
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   Node* start_offset,
+                                   Node* end_offset,
+                                   PhaseGVN* phase) {
+  Compile* C = phase->C;
+  int unit = BytesPerLong;
+  Node* zbase = start_offset;
+  Node* zend  = end_offset;
+
+  // Scale to the unit required by the CPU:
+  if (!Matcher::init_array_count_is_in_bytes) {
+    Node* shift = phase->intcon(exact_log2(unit));
+    zbase = phase->transform( new(C,3) URShiftXNode(zbase, shift) );
+    zend  = phase->transform( new(C,3) URShiftXNode(zend,  shift) );
+  }
+
+  Node* zsize = phase->transform( new(C,3) SubXNode(zend, zbase) );
+  Node* zinit = phase->zerocon((unit == BytesPerLong) ? T_LONG : T_INT);
+
+  // Bulk clear double-words
+  Node* adr = phase->transform( new(C,4) AddPNode(dest, dest, start_offset) );
+  mem = new (C, 4) ClearArrayNode(ctl, mem, zsize, adr);
+  return phase->transform(mem);
+}
+
+Node* ClearArrayNode::clear_memory(Node* ctl, Node* mem, Node* dest,
+                                   intptr_t start_offset,
+                                   intptr_t end_offset,
+                                   PhaseGVN* phase) {
+  Compile* C = phase->C;
+  assert((end_offset % BytesPerInt) == 0, "odd end offset");
+  intptr_t done_offset = end_offset;
+  if ((done_offset % BytesPerLong) != 0) {
+    done_offset -= BytesPerInt;
+  }
+  if (done_offset > start_offset) {
+    mem = clear_memory(ctl, mem, dest,
+                       start_offset, phase->MakeConX(done_offset), phase);
+  }
+  if (done_offset < end_offset) { // emit the final 32-bit store
+    Node* adr = new (C, 4) AddPNode(dest, dest, phase->MakeConX(done_offset));
+    adr = phase->transform(adr);
+    const TypePtr* atp = TypeRawPtr::BOTTOM;
+    mem = StoreNode::make(C, ctl, mem, adr, atp, phase->zerocon(T_INT), T_INT);
+    mem = phase->transform(mem);
+    done_offset += BytesPerInt;
+  }
+  assert(done_offset == end_offset, "");
+  return mem;
+}
+
+//=============================================================================
+// Do we match on this edge? No memory edges
+uint StrCompNode::match_edge(uint idx) const {
+  return idx == 5 || idx == 6;
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *StrCompNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+
+//=============================================================================
+MemBarNode::MemBarNode(Compile* C, int alias_idx, Node* precedent)
+  : MultiNode(TypeFunc::Parms + (precedent == NULL? 0: 1)),
+    _adr_type(C->get_adr_type(alias_idx))
+{
+  init_class_id(Class_MemBar);
+  Node* top = C->top();
+  init_req(TypeFunc::I_O,top);
+  init_req(TypeFunc::FramePtr,top);
+  init_req(TypeFunc::ReturnAdr,top);
+  if (precedent != NULL)
+    init_req(TypeFunc::Parms, precedent);
+}
+
+//------------------------------cmp--------------------------------------------
+uint MemBarNode::hash() const { return NO_HASH; }
+uint MemBarNode::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+//------------------------------make-------------------------------------------
+MemBarNode* MemBarNode::make(Compile* C, int opcode, int atp, Node* pn) {
+  int len = Precedent + (pn == NULL? 0: 1);
+  switch (opcode) {
+  case Op_MemBarAcquire:   return new(C, len) MemBarAcquireNode(C,  atp, pn);
+  case Op_MemBarRelease:   return new(C, len) MemBarReleaseNode(C,  atp, pn);
+  case Op_MemBarVolatile:  return new(C, len) MemBarVolatileNode(C, atp, pn);
+  case Op_MemBarCPUOrder:  return new(C, len) MemBarCPUOrderNode(C, atp, pn);
+  case Op_Initialize:      return new(C, len) InitializeNode(C,     atp, pn);
+  default:                 ShouldNotReachHere(); return NULL;
+  }
+}
+
+//------------------------------Ideal------------------------------------------
+// Return a node which is more "ideal" than the current node.  Strip out
+// control copies
+Node *MemBarNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if (remove_dead_region(phase, can_reshape))  return this;
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *MemBarNode::Value( PhaseTransform *phase ) const {
+  if( !in(0) ) return Type::TOP;
+  if( phase->type(in(0)) == Type::TOP )
+    return Type::TOP;
+  return TypeTuple::MEMBAR;
+}
+
+//------------------------------match------------------------------------------
+// Construct projections for memory.
+Node *MemBarNode::match( const ProjNode *proj, const Matcher *m ) {
+  switch (proj->_con) {
+  case TypeFunc::Control:
+  case TypeFunc::Memory:
+    return new (m->C, 1) MachProjNode(this,proj->_con,RegMask::Empty,MachProjNode::unmatched_proj);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//===========================InitializeNode====================================
+// SUMMARY:
+// This node acts as a memory barrier on raw memory, after some raw stores.
+// The 'cooked' oop value feeds from the Initialize, not the Allocation.
+// The Initialize can 'capture' suitably constrained stores as raw inits.
+// It can coalesce related raw stores into larger units (called 'tiles').
+// It can avoid zeroing new storage for memory units which have raw inits.
+// At macro-expansion, it is marked 'complete', and does not optimize further.
+//
+// EXAMPLE:
+// The object 'new short[2]' occupies 16 bytes in a 32-bit machine.
+//   ctl = incoming control; mem* = incoming memory
+// (Note:  A star * on a memory edge denotes I/O and other standard edges.)
+// First allocate uninitialized memory and fill in the header:
+//   alloc = (Allocate ctl mem* 16 #short[].klass ...)
+//   ctl := alloc.Control; mem* := alloc.Memory*
+//   rawmem = alloc.Memory; rawoop = alloc.RawAddress
+// Then initialize to zero the non-header parts of the raw memory block:
+//   init = (Initialize alloc.Control alloc.Memory* alloc.RawAddress)
+//   ctl := init.Control; mem.SLICE(#short[*]) := init.Memory
+// After the initialize node executes, the object is ready for service:
+//   oop := (CheckCastPP init.Control alloc.RawAddress #short[])
+// Suppose its body is immediately initialized as {1,2}:
+//   store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
+//   store2 = (StoreC init.Control store1      (+ oop 14) 2)
+//   mem.SLICE(#short[*]) := store2
+//
+// DETAILS:
+// An InitializeNode collects and isolates object initialization after
+// an AllocateNode and before the next possible safepoint.  As a
+// memory barrier (MemBarNode), it keeps critical stores from drifting
+// down past any safepoint or any publication of the allocation.
+// Before this barrier, a newly-allocated object may have uninitialized bits.
+// After this barrier, it may be treated as a real oop, and GC is allowed.
+//
+// The semantics of the InitializeNode include an implicit zeroing of
+// the new object from object header to the end of the object.
+// (The object header and end are determined by the AllocateNode.)
+//
+// Certain stores may be added as direct inputs to the InitializeNode.
+// These stores must update raw memory, and they must be to addresses
+// derived from the raw address produced by AllocateNode, and with
+// a constant offset.  They must be ordered by increasing offset.
+// The first one is at in(RawStores), the last at in(req()-1).
+// Unlike most memory operations, they are not linked in a chain,
+// but are displayed in parallel as users of the rawmem output of
+// the allocation.
+//
+// (See comments in InitializeNode::capture_store, which continue
+// the example given above.)
+//
+// When the associated Allocate is macro-expanded, the InitializeNode
+// may be rewritten to optimize collected stores.  A ClearArrayNode
+// may also be created at that point to represent any required zeroing.
+// The InitializeNode is then marked 'complete', prohibiting further
+// capturing of nearby memory operations.
+//
+// During macro-expansion, all captured initializations which store
+// constant values of 32 bits or smaller are coalesced (if advantagous)
+// into larger 'tiles' 32 or 64 bits.  This allows an object to be
+// initialized in fewer memory operations.  Memory words which are
+// covered by neither tiles nor non-constant stores are pre-zeroed
+// by explicit stores of zero.  (The code shape happens to do all
+// zeroing first, then all other stores, with both sequences occurring
+// in order of ascending offsets.)
+//
+// Alternatively, code may be inserted between an AllocateNode and its
+// InitializeNode, to perform arbitrary initialization of the new object.
+// E.g., the object copying intrinsics insert complex data transfers here.
+// The initialization must then be marked as 'complete' disable the
+// built-in zeroing semantics and the collection of initializing stores.
+//
+// While an InitializeNode is incomplete, reads from the memory state
+// produced by it are optimizable if they match the control edge and
+// new oop address associated with the allocation/initialization.
+// They return a stored value (if the offset matches) or else zero.
+// A write to the memory state, if it matches control and address,
+// and if it is to a constant offset, may be 'captured' by the
+// InitializeNode.  It is cloned as a raw memory operation and rewired
+// inside the initialization, to the raw oop produced by the allocation.
+// Operations on addresses which are provably distinct (e.g., to
+// other AllocateNodes) are allowed to bypass the initialization.
+//
+// The effect of all this is to consolidate object initialization
+// (both arrays and non-arrays, both piecewise and bulk) into a
+// single location, where it can be optimized as a unit.
+//
+// Only stores with an offset less than TrackedInitializationLimit words
+// will be considered for capture by an InitializeNode.  This puts a
+// reasonable limit on the complexity of optimized initializations.
+
+//---------------------------InitializeNode------------------------------------
+InitializeNode::InitializeNode(Compile* C, int adr_type, Node* rawoop)
+  : _is_complete(false),
+    MemBarNode(C, adr_type, rawoop)
+{
+  init_class_id(Class_Initialize);
+
+  assert(adr_type == Compile::AliasIdxRaw, "only valid atp");
+  assert(in(RawAddress) == rawoop, "proper init");
+  // Note:  allocation() can be NULL, for secondary initialization barriers
+}
+
+// Since this node is not matched, it will be processed by the
+// register allocator.  Declare that there are no constraints
+// on the allocation of the RawAddress edge.
+const RegMask &InitializeNode::in_RegMask(uint idx) const {
+  // This edge should be set to top, by the set_complete.  But be conservative.
+  if (idx == InitializeNode::RawAddress)
+    return *(Compile::current()->matcher()->idealreg2spillmask[in(idx)->ideal_reg()]);
+  return RegMask::Empty;
+}
+
+Node* InitializeNode::memory(uint alias_idx) {
+  Node* mem = in(Memory);
+  if (mem->is_MergeMem()) {
+    return mem->as_MergeMem()->memory_at(alias_idx);
+  } else {
+    // incoming raw memory is not split
+    return mem;
+  }
+}
+
+bool InitializeNode::is_non_zero() {
+  if (is_complete())  return false;
+  remove_extra_zeroes();
+  return (req() > RawStores);
+}
+
+void InitializeNode::set_complete(PhaseGVN* phase) {
+  assert(!is_complete(), "caller responsibility");
+  _is_complete = true;
+
+  // After this node is complete, it contains a bunch of
+  // raw-memory initializations.  There is no need for
+  // it to have anything to do with non-raw memory effects.
+  // Therefore, tell all non-raw users to re-optimize themselves,
+  // after skipping the memory effects of this initialization.
+  PhaseIterGVN* igvn = phase->is_IterGVN();
+  if (igvn)  igvn->add_users_to_worklist(this);
+}
+
+// convenience function
+// return false if the init contains any stores already
+bool AllocateNode::maybe_set_complete(PhaseGVN* phase) {
+  InitializeNode* init = initialization();
+  if (init == NULL || init->is_complete())  return false;
+  init->remove_extra_zeroes();
+  // for now, if this allocation has already collected any inits, bail:
+  if (init->is_non_zero())  return false;
+  init->set_complete(phase);
+  return true;
+}
+
+void InitializeNode::remove_extra_zeroes() {
+  if (req() == RawStores)  return;
+  Node* zmem = zero_memory();
+  uint fill = RawStores;
+  for (uint i = fill; i < req(); i++) {
+    Node* n = in(i);
+    if (n->is_top() || n == zmem)  continue;  // skip
+    if (fill < i)  set_req(fill, n);          // compact
+    ++fill;
+  }
+  // delete any empty spaces created:
+  while (fill < req()) {
+    del_req(fill);
+  }
+}
+
+// Helper for remembering which stores go with which offsets.
+intptr_t InitializeNode::get_store_offset(Node* st, PhaseTransform* phase) {
+  if (!st->is_Store())  return -1;  // can happen to dead code via subsume_node
+  intptr_t offset = -1;
+  Node* base = AddPNode::Ideal_base_and_offset(st->in(MemNode::Address),
+                                               phase, offset);
+  if (base == NULL)     return -1;  // something is dead,
+  if (offset < 0)       return -1;  //        dead, dead
+  return offset;
+}
+
+// Helper for proving that an initialization expression is
+// "simple enough" to be folded into an object initialization.
+// Attempts to prove that a store's initial value 'n' can be captured
+// within the initialization without creating a vicious cycle, such as:
+//     { Foo p = new Foo(); p.next = p; }
+// True for constants and parameters and small combinations thereof.
+bool InitializeNode::detect_init_independence(Node* n,
+                                              bool st_is_pinned,
+                                              int& count) {
+  if (n == NULL)      return true;   // (can this really happen?)
+  if (n->is_Proj())   n = n->in(0);
+  if (n == this)      return false;  // found a cycle
+  if (n->is_Con())    return true;
+  if (n->is_Start())  return true;   // params, etc., are OK
+  if (n->is_Root())   return true;   // even better
+
+  Node* ctl = n->in(0);
+  if (ctl != NULL && !ctl->is_top()) {
+    if (ctl->is_Proj())  ctl = ctl->in(0);
+    if (ctl == this)  return false;
+
+    // If we already know that the enclosing memory op is pinned right after
+    // the init, then any control flow that the store has picked up
+    // must have preceded the init, or else be equal to the init.
+    // Even after loop optimizations (which might change control edges)
+    // a store is never pinned *before* the availability of its inputs.
+    if (!MemNode::detect_dominating_control(ctl, this->in(0)))
+      return false;                  // failed to prove a good control
+
+  }
+
+  // Check data edges for possible dependencies on 'this'.
+  if ((count += 1) > 20)  return false;  // complexity limit
+  for (uint i = 1; i < n->req(); i++) {
+    Node* m = n->in(i);
+    if (m == NULL || m == n || m->is_top())  continue;
+    uint first_i = n->find_edge(m);
+    if (i != first_i)  continue;  // process duplicate edge just once
+    if (!detect_init_independence(m, st_is_pinned, count)) {
+      return false;
+    }
+  }
+
+  return true;
+}
+
+// Here are all the checks a Store must pass before it can be moved into
+// an initialization.  Returns zero if a check fails.
+// On success, returns the (constant) offset to which the store applies,
+// within the initialized memory.
+intptr_t InitializeNode::can_capture_store(StoreNode* st, PhaseTransform* phase) {
+  const int FAIL = 0;
+  if (st->req() != MemNode::ValueIn + 1)
+    return FAIL;                // an inscrutable StoreNode (card mark?)
+  Node* ctl = st->in(MemNode::Control);
+  if (!(ctl != NULL && ctl->is_Proj() && ctl->in(0) == this))
+    return FAIL;                // must be unconditional after the initialization
+  Node* mem = st->in(MemNode::Memory);
+  if (!(mem->is_Proj() && mem->in(0) == this))
+    return FAIL;                // must not be preceded by other stores
+  Node* adr = st->in(MemNode::Address);
+  intptr_t offset;
+  AllocateNode* alloc = AllocateNode::Ideal_allocation(adr, phase, offset);
+  if (alloc == NULL)
+    return FAIL;                // inscrutable address
+  if (alloc != allocation())
+    return FAIL;                // wrong allocation!  (store needs to float up)
+  Node* val = st->in(MemNode::ValueIn);
+  int complexity_count = 0;
+  if (!detect_init_independence(val, true, complexity_count))
+    return FAIL;                // stored value must be 'simple enough'
+
+  return offset;                // success
+}
+
+// Find the captured store in(i) which corresponds to the range
+// [start..start+size) in the initialized object.
+// If there is one, return its index i.  If there isn't, return the
+// negative of the index where it should be inserted.
+// Return 0 if the queried range overlaps an initialization boundary
+// or if dead code is encountered.
+// If size_in_bytes is zero, do not bother with overlap checks.
+int InitializeNode::captured_store_insertion_point(intptr_t start,
+                                                   int size_in_bytes,
+                                                   PhaseTransform* phase) {
+  const int FAIL = 0, MAX_STORE = BytesPerLong;
+
+  if (is_complete())
+    return FAIL;                // arraycopy got here first; punt
+
+  assert(allocation() != NULL, "must be present");
+
+  // no negatives, no header fields:
+  if (start < (intptr_t) sizeof(oopDesc))  return FAIL;
+  if (start < (intptr_t) sizeof(arrayOopDesc) &&
+      start < (intptr_t) allocation()->minimum_header_size())  return FAIL;
+
+  // after a certain size, we bail out on tracking all the stores:
+  intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
+  if (start >= ti_limit)  return FAIL;
+
+  for (uint i = InitializeNode::RawStores, limit = req(); ; ) {
+    if (i >= limit)  return -(int)i; // not found; here is where to put it
+
+    Node*    st     = in(i);
+    intptr_t st_off = get_store_offset(st, phase);
+    if (st_off < 0) {
+      if (st != zero_memory()) {
+        return FAIL;            // bail out if there is dead garbage
+      }
+    } else if (st_off > start) {
+      // ...we are done, since stores are ordered
+      if (st_off < start + size_in_bytes) {
+        return FAIL;            // the next store overlaps
+      }
+      return -(int)i;           // not found; here is where to put it
+    } else if (st_off < start) {
+      if (size_in_bytes != 0 &&
+          start < st_off + MAX_STORE &&
+          start < st_off + st->as_Store()->memory_size()) {
+        return FAIL;            // the previous store overlaps
+      }
+    } else {
+      if (size_in_bytes != 0 &&
+          st->as_Store()->memory_size() != size_in_bytes) {
+        return FAIL;            // mismatched store size
+      }
+      return i;
+    }
+
+    ++i;
+  }
+}
+
+// Look for a captured store which initializes at the offset 'start'
+// with the given size.  If there is no such store, and no other
+// initialization interferes, then return zero_memory (the memory
+// projection of the AllocateNode).
+Node* InitializeNode::find_captured_store(intptr_t start, int size_in_bytes,
+                                          PhaseTransform* phase) {
+  assert(stores_are_sane(phase), "");
+  int i = captured_store_insertion_point(start, size_in_bytes, phase);
+  if (i == 0) {
+    return NULL;                // something is dead
+  } else if (i < 0) {
+    return zero_memory();       // just primordial zero bits here
+  } else {
+    Node* st = in(i);           // here is the store at this position
+    assert(get_store_offset(st->as_Store(), phase) == start, "sanity");
+    return st;
+  }
+}
+
+// Create, as a raw pointer, an address within my new object at 'offset'.
+Node* InitializeNode::make_raw_address(intptr_t offset,
+                                       PhaseTransform* phase) {
+  Node* addr = in(RawAddress);
+  if (offset != 0) {
+    Compile* C = phase->C;
+    addr = phase->transform( new (C, 4) AddPNode(C->top(), addr,
+                                                 phase->MakeConX(offset)) );
+  }
+  return addr;
+}
+
+// Clone the given store, converting it into a raw store
+// initializing a field or element of my new object.
+// Caller is responsible for retiring the original store,
+// with subsume_node or the like.
+//
+// From the example above InitializeNode::InitializeNode,
+// here are the old stores to be captured:
+//   store1 = (StoreC init.Control init.Memory (+ oop 12) 1)
+//   store2 = (StoreC init.Control store1      (+ oop 14) 2)
+//
+// Here is the changed code; note the extra edges on init:
+//   alloc = (Allocate ...)
+//   rawoop = alloc.RawAddress
+//   rawstore1 = (StoreC alloc.Control alloc.Memory (+ rawoop 12) 1)
+//   rawstore2 = (StoreC alloc.Control alloc.Memory (+ rawoop 14) 2)
+//   init = (Initialize alloc.Control alloc.Memory rawoop
+//                      rawstore1 rawstore2)
+//
+Node* InitializeNode::capture_store(StoreNode* st, intptr_t start,
+                                    PhaseTransform* phase) {
+  assert(stores_are_sane(phase), "");
+
+  if (start < 0)  return NULL;
+  assert(can_capture_store(st, phase) == start, "sanity");
+
+  Compile* C = phase->C;
+  int size_in_bytes = st->memory_size();
+  int i = captured_store_insertion_point(start, size_in_bytes, phase);
+  if (i == 0)  return NULL;     // bail out
+  Node* prev_mem = NULL;        // raw memory for the captured store
+  if (i > 0) {
+    prev_mem = in(i);           // there is a pre-existing store under this one
+    set_req(i, C->top());       // temporarily disconnect it
+    // See StoreNode::Ideal 'st->outcnt() == 1' for the reason to disconnect.
+  } else {
+    i = -i;                     // no pre-existing store
+    prev_mem = zero_memory();   // a slice of the newly allocated object
+    if (i > InitializeNode::RawStores && in(i-1) == prev_mem)
+      set_req(--i, C->top());   // reuse this edge; it has been folded away
+    else
+      ins_req(i, C->top());     // build a new edge
+  }
+  Node* new_st = st->clone();
+  new_st->set_req(MemNode::Control, in(Control));
+  new_st->set_req(MemNode::Memory,  prev_mem);
+  new_st->set_req(MemNode::Address, make_raw_address(start, phase));
+  new_st = phase->transform(new_st);
+
+  // At this point, new_st might have swallowed a pre-existing store
+  // at the same offset, or perhaps new_st might have disappeared,
+  // if it redundantly stored the same value (or zero to fresh memory).
+
+  // In any case, wire it in:
+  set_req(i, new_st);
+
+  // The caller may now kill the old guy.
+  DEBUG_ONLY(Node* check_st = find_captured_store(start, size_in_bytes, phase));
+  assert(check_st == new_st || check_st == NULL, "must be findable");
+  assert(!is_complete(), "");
+  return new_st;
+}
+
+static bool store_constant(jlong* tiles, int num_tiles,
+                           intptr_t st_off, int st_size,
+                           jlong con) {
+  if ((st_off & (st_size-1)) != 0)
+    return false;               // strange store offset (assume size==2**N)
+  address addr = (address)tiles + st_off;
+  assert(st_off >= 0 && addr+st_size <= (address)&tiles[num_tiles], "oob");
+  switch (st_size) {
+  case sizeof(jbyte):  *(jbyte*) addr = (jbyte) con; break;
+  case sizeof(jchar):  *(jchar*) addr = (jchar) con; break;
+  case sizeof(jint):   *(jint*)  addr = (jint)  con; break;
+  case sizeof(jlong):  *(jlong*) addr = (jlong) con; break;
+  default: return false;        // strange store size (detect size!=2**N here)
+  }
+  return true;                  // return success to caller
+}
+
+// Coalesce subword constants into int constants and possibly
+// into long constants.  The goal, if the CPU permits,
+// is to initialize the object with a small number of 64-bit tiles.
+// Also, convert floating-point constants to bit patterns.
+// Non-constants are not relevant to this pass.
+//
+// In terms of the running example on InitializeNode::InitializeNode
+// and InitializeNode::capture_store, here is the transformation
+// of rawstore1 and rawstore2 into rawstore12:
+//   alloc = (Allocate ...)
+//   rawoop = alloc.RawAddress
+//   tile12 = 0x00010002
+//   rawstore12 = (StoreI alloc.Control alloc.Memory (+ rawoop 12) tile12)
+//   init = (Initialize alloc.Control alloc.Memory rawoop rawstore12)
+//
+void
+InitializeNode::coalesce_subword_stores(intptr_t header_size,
+                                        Node* size_in_bytes,
+                                        PhaseGVN* phase) {
+  Compile* C = phase->C;
+
+  assert(stores_are_sane(phase), "");
+  // Note:  After this pass, they are not completely sane,
+  // since there may be some overlaps.
+
+  int old_subword = 0, old_long = 0, new_int = 0, new_long = 0;
+
+  intptr_t ti_limit = (TrackedInitializationLimit * HeapWordSize);
+  intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, ti_limit);
+  size_limit = MIN2(size_limit, ti_limit);
+  size_limit = align_size_up(size_limit, BytesPerLong);
+  int num_tiles = size_limit / BytesPerLong;
+
+  // allocate space for the tile map:
+  const int small_len = DEBUG_ONLY(true ? 3 :) 30; // keep stack frames small
+  jlong  tiles_buf[small_len];
+  Node*  nodes_buf[small_len];
+  jlong  inits_buf[small_len];
+  jlong* tiles = ((num_tiles <= small_len) ? &tiles_buf[0]
+                  : NEW_RESOURCE_ARRAY(jlong, num_tiles));
+  Node** nodes = ((num_tiles <= small_len) ? &nodes_buf[0]
+                  : NEW_RESOURCE_ARRAY(Node*, num_tiles));
+  jlong* inits = ((num_tiles <= small_len) ? &inits_buf[0]
+                  : NEW_RESOURCE_ARRAY(jlong, num_tiles));
+  // tiles: exact bitwise model of all primitive constants
+  // nodes: last constant-storing node subsumed into the tiles model
+  // inits: which bytes (in each tile) are touched by any initializations
+
+  //// Pass A: Fill in the tile model with any relevant stores.
+
+  Copy::zero_to_bytes(tiles, sizeof(tiles[0]) * num_tiles);
+  Copy::zero_to_bytes(nodes, sizeof(nodes[0]) * num_tiles);
+  Copy::zero_to_bytes(inits, sizeof(inits[0]) * num_tiles);
+  Node* zmem = zero_memory(); // initially zero memory state
+  for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
+    Node* st = in(i);
+    intptr_t st_off = get_store_offset(st, phase);
+
+    // Figure out the store's offset and constant value:
+    if (st_off < header_size)             continue; //skip (ignore header)
+    if (st->in(MemNode::Memory) != zmem)  continue; //skip (odd store chain)
+    int st_size = st->as_Store()->memory_size();
+    if (st_off + st_size > size_limit)    break;
+
+    // Record which bytes are touched, whether by constant or not.
+    if (!store_constant(inits, num_tiles, st_off, st_size, (jlong) -1))
+      continue;                 // skip (strange store size)
+
+    const Type* val = phase->type(st->in(MemNode::ValueIn));
+    if (!val->singleton())                continue; //skip (non-con store)
+    BasicType type = val->basic_type();
+
+    jlong con = 0;
+    switch (type) {
+    case T_INT:    con = val->is_int()->get_con();  break;
+    case T_LONG:   con = val->is_long()->get_con(); break;
+    case T_FLOAT:  con = jint_cast(val->getf());    break;
+    case T_DOUBLE: con = jlong_cast(val->getd());   break;
+    default:                              continue; //skip (odd store type)
+    }
+
+    if (type == T_LONG && Matcher::isSimpleConstant64(con) &&
+        st->Opcode() == Op_StoreL) {
+      continue;                 // This StoreL is already optimal.
+    }
+
+    // Store down the constant.
+    store_constant(tiles, num_tiles, st_off, st_size, con);
+
+    intptr_t j = st_off >> LogBytesPerLong;
+
+    if (type == T_INT && st_size == BytesPerInt
+        && (st_off & BytesPerInt) == BytesPerInt) {
+      jlong lcon = tiles[j];
+      if (!Matcher::isSimpleConstant64(lcon) &&
+          st->Opcode() == Op_StoreI) {
+        // This StoreI is already optimal by itself.
+        jint* intcon = (jint*) &tiles[j];
+        intcon[1] = 0;  // undo the store_constant()
+
+        // If the previous store is also optimal by itself, back up and
+        // undo the action of the previous loop iteration... if we can.
+        // But if we can't, just let the previous half take care of itself.
+        st = nodes[j];
+        st_off -= BytesPerInt;
+        con = intcon[0];
+        if (con != 0 && st != NULL && st->Opcode() == Op_StoreI) {
+          assert(st_off >= header_size, "still ignoring header");
+          assert(get_store_offset(st, phase) == st_off, "must be");
+          assert(in(i-1) == zmem, "must be");
+          DEBUG_ONLY(const Type* tcon = phase->type(st->in(MemNode::ValueIn)));
+          assert(con == tcon->is_int()->get_con(), "must be");
+          // Undo the effects of the previous loop trip, which swallowed st:
+          intcon[0] = 0;        // undo store_constant()
+          set_req(i-1, st);     // undo set_req(i, zmem)
+          nodes[j] = NULL;      // undo nodes[j] = st
+          --old_subword;        // undo ++old_subword
+        }
+        continue;               // This StoreI is already optimal.
+      }
+    }
+
+    // This store is not needed.
+    set_req(i, zmem);
+    nodes[j] = st;              // record for the moment
+    if (st_size < BytesPerLong) // something has changed
+          ++old_subword;        // includes int/float, but who's counting...
+    else  ++old_long;
+  }
+
+  if ((old_subword + old_long) == 0)
+    return;                     // nothing more to do
+
+  //// Pass B: Convert any non-zero tiles into optimal constant stores.
+  // Be sure to insert them before overlapping non-constant stores.
+  // (E.g., byte[] x = { 1,2,y,4 }  =>  x[int 0] = 0x01020004, x[2]=y.)
+  for (int j = 0; j < num_tiles; j++) {
+    jlong con  = tiles[j];
+    jlong init = inits[j];
+    if (con == 0)  continue;
+    jint con0,  con1;           // split the constant, address-wise
+    jint init0, init1;          // split the init map, address-wise
+    { union { jlong con; jint intcon[2]; } u;
+      u.con = con;
+      con0  = u.intcon[0];
+      con1  = u.intcon[1];
+      u.con = init;
+      init0 = u.intcon[0];
+      init1 = u.intcon[1];
+    }
+
+    Node* old = nodes[j];
+    assert(old != NULL, "need the prior store");
+    intptr_t offset = (j * BytesPerLong);
+
+    bool split = !Matcher::isSimpleConstant64(con);
+
+    if (offset < header_size) {
+      assert(offset + BytesPerInt >= header_size, "second int counts");
+      assert(*(jint*)&tiles[j] == 0, "junk in header");
+      split = true;             // only the second word counts
+      // Example:  int a[] = { 42 ... }
+    } else if (con0 == 0 && init0 == -1) {
+      split = true;             // first word is covered by full inits
+      // Example:  int a[] = { ... foo(), 42 ... }
+    } else if (con1 == 0 && init1 == -1) {
+      split = true;             // second word is covered by full inits
+      // Example:  int a[] = { ... 42, foo() ... }
+    }
+
+    // Here's a case where init0 is neither 0 nor -1:
+    //   byte a[] = { ... 0,0,foo(),0,  0,0,0,42 ... }
+    // Assuming big-endian memory, init0, init1 are 0x0000FF00, 0x000000FF.
+    // In this case the tile is not split; it is (jlong)42.
+    // The big tile is stored down, and then the foo() value is inserted.
+    // (If there were foo(),foo() instead of foo(),0, init0 would be -1.)
+
+    Node* ctl = old->in(MemNode::Control);
+    Node* adr = make_raw_address(offset, phase);
+    const TypePtr* atp = TypeRawPtr::BOTTOM;
+
+    // One or two coalesced stores to plop down.
+    Node*    st[2];
+    intptr_t off[2];
+    int  nst = 0;
+    if (!split) {
+      ++new_long;
+      off[nst] = offset;
+      st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+                                  phase->longcon(con), T_LONG);
+    } else {
+      // Omit either if it is a zero.
+      if (con0 != 0) {
+        ++new_int;
+        off[nst]  = offset;
+        st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+                                    phase->intcon(con0), T_INT);
+      }
+      if (con1 != 0) {
+        ++new_int;
+        offset += BytesPerInt;
+        adr = make_raw_address(offset, phase);
+        off[nst]  = offset;
+        st[nst++] = StoreNode::make(C, ctl, zmem, adr, atp,
+                                    phase->intcon(con1), T_INT);
+      }
+    }
+
+    // Insert second store first, then the first before the second.
+    // Insert each one just before any overlapping non-constant stores.
+    while (nst > 0) {
+      Node* st1 = st[--nst];
+      C->copy_node_notes_to(st1, old);
+      st1 = phase->transform(st1);
+      offset = off[nst];
+      assert(offset >= header_size, "do not smash header");
+      int ins_idx = captured_store_insertion_point(offset, /*size:*/0, phase);
+      guarantee(ins_idx != 0, "must re-insert constant store");
+      if (ins_idx < 0)  ins_idx = -ins_idx;  // never overlap
+      if (ins_idx > InitializeNode::RawStores && in(ins_idx-1) == zmem)
+        set_req(--ins_idx, st1);
+      else
+        ins_req(ins_idx, st1);
+    }
+  }
+
+  if (PrintCompilation && WizardMode)
+    tty->print_cr("Changed %d/%d subword/long constants into %d/%d int/long",
+                  old_subword, old_long, new_int, new_long);
+  if (C->log() != NULL)
+    C->log()->elem("comment that='%d/%d subword/long to %d/%d int/long'",
+                   old_subword, old_long, new_int, new_long);
+
+  // Clean up any remaining occurrences of zmem:
+  remove_extra_zeroes();
+}
+
+// Explore forward from in(start) to find the first fully initialized
+// word, and return its offset.  Skip groups of subword stores which
+// together initialize full words.  If in(start) is itself part of a
+// fully initialized word, return the offset of in(start).  If there
+// are no following full-word stores, or if something is fishy, return
+// a negative value.
+intptr_t InitializeNode::find_next_fullword_store(uint start, PhaseGVN* phase) {
+  int       int_map = 0;
+  intptr_t  int_map_off = 0;
+  const int FULL_MAP = right_n_bits(BytesPerInt);  // the int_map we hope for
+
+  for (uint i = start, limit = req(); i < limit; i++) {
+    Node* st = in(i);
+
+    intptr_t st_off = get_store_offset(st, phase);
+    if (st_off < 0)  break;  // return conservative answer
+
+    int st_size = st->as_Store()->memory_size();
+    if (st_size >= BytesPerInt && (st_off % BytesPerInt) == 0) {
+      return st_off;            // we found a complete word init
+    }
+
+    // update the map:
+
+    intptr_t this_int_off = align_size_down(st_off, BytesPerInt);
+    if (this_int_off != int_map_off) {
+      // reset the map:
+      int_map = 0;
+      int_map_off = this_int_off;
+    }
+
+    int subword_off = st_off - this_int_off;
+    int_map |= right_n_bits(st_size) << subword_off;
+    if ((int_map & FULL_MAP) == FULL_MAP) {
+      return this_int_off;      // we found a complete word init
+    }
+
+    // Did this store hit or cross the word boundary?
+    intptr_t next_int_off = align_size_down(st_off + st_size, BytesPerInt);
+    if (next_int_off == this_int_off + BytesPerInt) {
+      // We passed the current int, without fully initializing it.
+      int_map_off = next_int_off;
+      int_map >>= BytesPerInt;
+    } else if (next_int_off > this_int_off + BytesPerInt) {
+      // We passed the current and next int.
+      return this_int_off + BytesPerInt;
+    }
+  }
+
+  return -1;
+}
+
+
+// Called when the associated AllocateNode is expanded into CFG.
+// At this point, we may perform additional optimizations.
+// Linearize the stores by ascending offset, to make memory
+// activity as coherent as possible.
+Node* InitializeNode::complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
+                                      intptr_t header_size,
+                                      Node* size_in_bytes,
+                                      PhaseGVN* phase) {
+  assert(!is_complete(), "not already complete");
+  assert(stores_are_sane(phase), "");
+  assert(allocation() != NULL, "must be present");
+
+  remove_extra_zeroes();
+
+  if (ReduceFieldZeroing || ReduceBulkZeroing)
+    // reduce instruction count for common initialization patterns
+    coalesce_subword_stores(header_size, size_in_bytes, phase);
+
+  Node* zmem = zero_memory();   // initially zero memory state
+  Node* inits = zmem;           // accumulating a linearized chain of inits
+  #ifdef ASSERT
+  intptr_t last_init_off = sizeof(oopDesc);  // previous init offset
+  intptr_t last_init_end = sizeof(oopDesc);  // previous init offset+size
+  intptr_t last_tile_end = sizeof(oopDesc);  // previous tile offset+size
+  #endif
+  intptr_t zeroes_done = header_size;
+
+  bool do_zeroing = true;       // we might give up if inits are very sparse
+  int  big_init_gaps = 0;       // how many large gaps have we seen?
+
+  if (ZeroTLAB)  do_zeroing = false;
+  if (!ReduceFieldZeroing && !ReduceBulkZeroing)  do_zeroing = false;
+
+  for (uint i = InitializeNode::RawStores, limit = req(); i < limit; i++) {
+    Node* st = in(i);
+    intptr_t st_off = get_store_offset(st, phase);
+    if (st_off < 0)
+      break;                    // unknown junk in the inits
+    if (st->in(MemNode::Memory) != zmem)
+      break;                    // complicated store chains somehow in list
+
+    int st_size = st->as_Store()->memory_size();
+    intptr_t next_init_off = st_off + st_size;
+
+    if (do_zeroing && zeroes_done < next_init_off) {
+      // See if this store needs a zero before it or under it.
+      intptr_t zeroes_needed = st_off;
+
+      if (st_size < BytesPerInt) {
+        // Look for subword stores which only partially initialize words.
+        // If we find some, we must lay down some word-level zeroes first,
+        // underneath the subword stores.
+        //
+        // Examples:
+        //   byte[] a = { p,q,r,s }  =>  a[0]=p,a[1]=q,a[2]=r,a[3]=s
+        //   byte[] a = { x,y,0,0 }  =>  a[0..3] = 0, a[0]=x,a[1]=y
+        //   byte[] a = { 0,0,z,0 }  =>  a[0..3] = 0, a[2]=z
+        //
+        // Note:  coalesce_subword_stores may have already done this,
+        // if it was prompted by constant non-zero subword initializers.
+        // But this case can still arise with non-constant stores.
+
+        intptr_t next_full_store = find_next_fullword_store(i, phase);
+
+        // In the examples above:
+        //   in(i)          p   q   r   s     x   y     z
+        //   st_off        12  13  14  15    12  13    14
+        //   st_size        1   1   1   1     1   1     1
+        //   next_full_s.  12  16  16  16    16  16    16
+        //   z's_done      12  16  16  16    12  16    12
+        //   z's_needed    12  16  16  16    16  16    16
+        //   zsize          0   0   0   0     4   0     4
+        if (next_full_store < 0) {
+          // Conservative tack:  Zero to end of current word.
+          zeroes_needed = align_size_up(zeroes_needed, BytesPerInt);
+        } else {
+          // Zero to beginning of next fully initialized word.
+          // Or, don't zero at all, if we are already in that word.
+          assert(next_full_store >= zeroes_needed, "must go forward");
+          assert((next_full_store & (BytesPerInt-1)) == 0, "even boundary");
+          zeroes_needed = next_full_store;
+        }
+      }
+
+      if (zeroes_needed > zeroes_done) {
+        intptr_t zsize = zeroes_needed - zeroes_done;
+        // Do some incremental zeroing on rawmem, in parallel with inits.
+        zeroes_done = align_size_down(zeroes_done, BytesPerInt);
+        rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+                                              zeroes_done, zeroes_needed,
+                                              phase);
+        zeroes_done = zeroes_needed;
+        if (zsize > Matcher::init_array_short_size && ++big_init_gaps > 2)
+          do_zeroing = false;   // leave the hole, next time
+      }
+    }
+
+    // Collect the store and move on:
+    st->set_req(MemNode::Memory, inits);
+    inits = st;                 // put it on the linearized chain
+    set_req(i, zmem);           // unhook from previous position
+
+    if (zeroes_done == st_off)
+      zeroes_done = next_init_off;
+
+    assert(!do_zeroing || zeroes_done >= next_init_off, "don't miss any");
+
+    #ifdef ASSERT
+    // Various order invariants.  Weaker than stores_are_sane because
+    // a large constant tile can be filled in by smaller non-constant stores.
+    assert(st_off >= last_init_off, "inits do not reverse");
+    last_init_off = st_off;
+    const Type* val = NULL;
+    if (st_size >= BytesPerInt &&
+        (val = phase->type(st->in(MemNode::ValueIn)))->singleton() &&
+        (int)val->basic_type() < (int)T_OBJECT) {
+      assert(st_off >= last_tile_end, "tiles do not overlap");
+      assert(st_off >= last_init_end, "tiles do not overwrite inits");
+      last_tile_end = MAX2(last_tile_end, next_init_off);
+    } else {
+      intptr_t st_tile_end = align_size_up(next_init_off, BytesPerLong);
+      assert(st_tile_end >= last_tile_end, "inits stay with tiles");
+      assert(st_off      >= last_init_end, "inits do not overlap");
+      last_init_end = next_init_off;  // it's a non-tile
+    }
+    #endif //ASSERT
+  }
+
+  remove_extra_zeroes();        // clear out all the zmems left over
+  add_req(inits);
+
+  if (!ZeroTLAB) {
+    // If anything remains to be zeroed, zero it all now.
+    zeroes_done = align_size_down(zeroes_done, BytesPerInt);
+    // if it is the last unused 4 bytes of an instance, forget about it
+    intptr_t size_limit = phase->find_intptr_t_con(size_in_bytes, max_jint);
+    if (zeroes_done + BytesPerLong >= size_limit) {
+      assert(allocation() != NULL, "");
+      Node* klass_node = allocation()->in(AllocateNode::KlassNode);
+      ciKlass* k = phase->type(klass_node)->is_klassptr()->klass();
+      if (zeroes_done == k->layout_helper())
+        zeroes_done = size_limit;
+    }
+    if (zeroes_done < size_limit) {
+      rawmem = ClearArrayNode::clear_memory(rawctl, rawmem, rawptr,
+                                            zeroes_done, size_in_bytes, phase);
+    }
+  }
+
+  set_complete(phase);
+  return rawmem;
+}
+
+
+#ifdef ASSERT
+bool InitializeNode::stores_are_sane(PhaseTransform* phase) {
+  if (is_complete())
+    return true;                // stores could be anything at this point
+  intptr_t last_off = sizeof(oopDesc);
+  for (uint i = InitializeNode::RawStores; i < req(); i++) {
+    Node* st = in(i);
+    intptr_t st_off = get_store_offset(st, phase);
+    if (st_off < 0)  continue;  // ignore dead garbage
+    if (last_off > st_off) {
+      tty->print_cr("*** bad store offset at %d: %d > %d", i, last_off, st_off);
+      this->dump(2);
+      assert(false, "ascending store offsets");
+      return false;
+    }
+    last_off = st_off + st->as_Store()->memory_size();
+  }
+  return true;
+}
+#endif //ASSERT
+
+
+
+
+//============================MergeMemNode=====================================
+//
+// SEMANTICS OF MEMORY MERGES:  A MergeMem is a memory state assembled from several
+// contributing store or call operations.  Each contributor provides the memory
+// state for a particular "alias type" (see Compile::alias_type).  For example,
+// if a MergeMem has an input X for alias category #6, then any memory reference
+// to alias category #6 may use X as its memory state input, as an exact equivalent
+// to using the MergeMem as a whole.
+//   Load<6>( MergeMem(<6>: X, ...), p ) <==> Load<6>(X,p)
+//
+// (Here, the <N> notation gives the index of the relevant adr_type.)
+//
+// In one special case (and more cases in the future), alias categories overlap.
+// The special alias category "Bot" (Compile::AliasIdxBot) includes all memory
+// states.  Therefore, if a MergeMem has only one contributing input W for Bot,
+// it is exactly equivalent to that state W:
+//   MergeMem(<Bot>: W) <==> W
+//
+// Usually, the merge has more than one input.  In that case, where inputs
+// overlap (i.e., one is Bot), the narrower alias type determines the memory
+// state for that type, and the wider alias type (Bot) fills in everywhere else:
+//   Load<5>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<5>(W,p)
+//   Load<6>( MergeMem(<Bot>: W, <6>: X), p ) <==> Load<6>(X,p)
+//
+// A merge can take a "wide" memory state as one of its narrow inputs.
+// This simply means that the merge observes out only the relevant parts of
+// the wide input.  That is, wide memory states arriving at narrow merge inputs
+// are implicitly "filtered" or "sliced" as necessary.  (This is rare.)
+//
+// These rules imply that MergeMem nodes may cascade (via their <Bot> links),
+// and that memory slices "leak through":
+//   MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y)) <==> MergeMem(<Bot>: W, <7>: Y)
+//
+// But, in such a cascade, repeated memory slices can "block the leak":
+//   MergeMem(<Bot>: MergeMem(<Bot>: W, <7>: Y), <7>: Y') <==> MergeMem(<Bot>: W, <7>: Y')
+//
+// In the last example, Y is not part of the combined memory state of the
+// outermost MergeMem.  The system must, of course, prevent unschedulable
+// memory states from arising, so you can be sure that the state Y is somehow
+// a precursor to state Y'.
+//
+//
+// REPRESENTATION OF MEMORY MERGES: The indexes used to address the Node::in array
+// of each MergeMemNode array are exactly the numerical alias indexes, including
+// but not limited to AliasIdxTop, AliasIdxBot, and AliasIdxRaw.  The functions
+// Compile::alias_type (and kin) produce and manage these indexes.
+//
+// By convention, the value of in(AliasIdxTop) (i.e., in(1)) is always the top node.
+// (Note that this provides quick access to the top node inside MergeMem methods,
+// without the need to reach out via TLS to Compile::current.)
+//
+// As a consequence of what was just described, a MergeMem that represents a full
+// memory state has an edge in(AliasIdxBot) which is a "wide" memory state,
+// containing all alias categories.
+//
+// MergeMem nodes never (?) have control inputs, so in(0) is NULL.
+//
+// All other edges in(N) (including in(AliasIdxRaw), which is in(3)) are either
+// a memory state for the alias type <N>, or else the top node, meaning that
+// there is no particular input for that alias type.  Note that the length of
+// a MergeMem is variable, and may be extended at any time to accommodate new
+// memory states at larger alias indexes.  When merges grow, they are of course
+// filled with "top" in the unused in() positions.
+//
+// This use of top is named "empty_memory()", or "empty_mem" (no-memory) as a variable.
+// (Top was chosen because it works smoothly with passes like GCM.)
+//
+// For convenience, we hardwire the alias index for TypeRawPtr::BOTTOM.  (It is
+// the type of random VM bits like TLS references.)  Since it is always the
+// first non-Bot memory slice, some low-level loops use it to initialize an
+// index variable:  for (i = AliasIdxRaw; i < req(); i++).
+//
+//
+// ACCESSORS:  There is a special accessor MergeMemNode::base_memory which returns
+// the distinguished "wide" state.  The accessor MergeMemNode::memory_at(N) returns
+// the memory state for alias type <N>, or (if there is no particular slice at <N>,
+// it returns the base memory.  To prevent bugs, memory_at does not accept <Top>
+// or <Bot> indexes.  The iterator MergeMemStream provides robust iteration over
+// MergeMem nodes or pairs of such nodes, ensuring that the non-top edges are visited.
+//
+// %%%% We may get rid of base_memory as a separate accessor at some point; it isn't
+// really that different from the other memory inputs.  An abbreviation called
+// "bot_memory()" for "memory_at(AliasIdxBot)" would keep code tidy.
+//
+//
+// PARTIAL MEMORY STATES:  During optimization, MergeMem nodes may arise that represent
+// partial memory states.  When a Phi splits through a MergeMem, the copy of the Phi
+// that "emerges though" the base memory will be marked as excluding the alias types
+// of the other (narrow-memory) copies which "emerged through" the narrow edges:
+//
+//   Phi<Bot>(U, MergeMem(<Bot>: W, <8>: Y))
+//     ==Ideal=>  MergeMem(<Bot>: Phi<Bot-8>(U, W), Phi<8>(U, Y))
+//
+// This strange "subtraction" effect is necessary to ensure IGVN convergence.
+// (It is currently unimplemented.)  As you can see, the resulting merge is
+// actually a disjoint union of memory states, rather than an overlay.
+//
+
+//------------------------------MergeMemNode-----------------------------------
+Node* MergeMemNode::make_empty_memory() {
+  Node* empty_memory = (Node*) Compile::current()->top();
+  assert(empty_memory->is_top(), "correct sentinel identity");
+  return empty_memory;
+}
+
+MergeMemNode::MergeMemNode(Node *new_base) : Node(1+Compile::AliasIdxRaw) {
+  init_class_id(Class_MergeMem);
+  // all inputs are nullified in Node::Node(int)
+  // set_input(0, NULL);  // no control input
+
+  // Initialize the edges uniformly to top, for starters.
+  Node* empty_mem = make_empty_memory();
+  for (uint i = Compile::AliasIdxTop; i < req(); i++) {
+    init_req(i,empty_mem);
+  }
+  assert(empty_memory() == empty_mem, "");
+
+  if( new_base != NULL && new_base->is_MergeMem() ) {
+    MergeMemNode* mdef = new_base->as_MergeMem();
+    assert(mdef->empty_memory() == empty_mem, "consistent sentinels");
+    for (MergeMemStream mms(this, mdef); mms.next_non_empty2(); ) {
+      mms.set_memory(mms.memory2());
+    }
+    assert(base_memory() == mdef->base_memory(), "");
+  } else {
+    set_base_memory(new_base);
+  }
+}
+
+// Make a new, untransformed MergeMem with the same base as 'mem'.
+// If mem is itself a MergeMem, populate the result with the same edges.
+MergeMemNode* MergeMemNode::make(Compile* C, Node* mem) {
+  return new(C, 1+Compile::AliasIdxRaw) MergeMemNode(mem);
+}
+
+//------------------------------cmp--------------------------------------------
+uint MergeMemNode::hash() const { return NO_HASH; }
+uint MergeMemNode::cmp( const Node &n ) const {
+  return (&n == this);          // Always fail except on self
+}
+
+//------------------------------Identity---------------------------------------
+Node* MergeMemNode::Identity(PhaseTransform *phase) {
+  // Identity if this merge point does not record any interesting memory
+  // disambiguations.
+  Node* base_mem = base_memory();
+  Node* empty_mem = empty_memory();
+  if (base_mem != empty_mem) {  // Memory path is not dead?
+    for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+      Node* mem = in(i);
+      if (mem != empty_mem && mem != base_mem) {
+        return this;            // Many memory splits; no change
+      }
+    }
+  }
+  return base_mem;              // No memory splits; ID on the one true input
+}
+
+//------------------------------Ideal------------------------------------------
+// This method is invoked recursively on chains of MergeMem nodes
+Node *MergeMemNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Remove chain'd MergeMems
+  //
+  // This is delicate, because the each "in(i)" (i >= Raw) is interpreted
+  // relative to the "in(Bot)".  Since we are patching both at the same time,
+  // we have to be careful to read each "in(i)" relative to the old "in(Bot)",
+  // but rewrite each "in(i)" relative to the new "in(Bot)".
+  Node *progress = NULL;
+
+
+  Node* old_base = base_memory();
+  Node* empty_mem = empty_memory();
+  if (old_base == empty_mem)
+    return NULL; // Dead memory path.
+
+  MergeMemNode* old_mbase;
+  if (old_base != NULL && old_base->is_MergeMem())
+    old_mbase = old_base->as_MergeMem();
+  else
+    old_mbase = NULL;
+  Node* new_base = old_base;
+
+  // simplify stacked MergeMems in base memory
+  if (old_mbase)  new_base = old_mbase->base_memory();
+
+  // the base memory might contribute new slices beyond my req()
+  if (old_mbase)  grow_to_match(old_mbase);
+
+  // Look carefully at the base node if it is a phi.
+  PhiNode* phi_base;
+  if (new_base != NULL && new_base->is_Phi())
+    phi_base = new_base->as_Phi();
+  else
+    phi_base = NULL;
+
+  Node*    phi_reg = NULL;
+  uint     phi_len = (uint)-1;
+  if (phi_base != NULL && !phi_base->is_copy()) {
+    // do not examine phi if degraded to a copy
+    phi_reg = phi_base->region();
+    phi_len = phi_base->req();
+    // see if the phi is unfinished
+    for (uint i = 1; i < phi_len; i++) {
+      if (phi_base->in(i) == NULL) {
+        // incomplete phi; do not look at it yet!
+        phi_reg = NULL;
+        phi_len = (uint)-1;
+        break;
+      }
+    }
+  }
+
+  // Note:  We do not call verify_sparse on entry, because inputs
+  // can normalize to the base_memory via subsume_node or similar
+  // mechanisms.  This method repairs that damage.
+
+  assert(!old_mbase || old_mbase->is_empty_memory(empty_mem), "consistent sentinels");
+
+  // Look at each slice.
+  for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+    Node* old_in = in(i);
+    // calculate the old memory value
+    Node* old_mem = old_in;
+    if (old_mem == empty_mem)  old_mem = old_base;
+    assert(old_mem == memory_at(i), "");
+
+    // maybe update (reslice) the old memory value
+
+    // simplify stacked MergeMems
+    Node* new_mem = old_mem;
+    MergeMemNode* old_mmem;
+    if (old_mem != NULL && old_mem->is_MergeMem())
+      old_mmem = old_mem->as_MergeMem();
+    else
+      old_mmem = NULL;
+    if (old_mmem == this) {
+      // This can happen if loops break up and safepoints disappear.
+      // A merge of BotPtr (default) with a RawPtr memory derived from a
+      // safepoint can be rewritten to a merge of the same BotPtr with
+      // the BotPtr phi coming into the loop.  If that phi disappears
+      // also, we can end up with a self-loop of the mergemem.
+      // In general, if loops degenerate and memory effects disappear,
+      // a mergemem can be left looking at itself.  This simply means
+      // that the mergemem's default should be used, since there is
+      // no longer any apparent effect on this slice.
+      // Note: If a memory slice is a MergeMem cycle, it is unreachable
+      //       from start.  Update the input to TOP.
+      new_mem = (new_base == this || new_base == empty_mem)? empty_mem : new_base;
+    }
+    else if (old_mmem != NULL) {
+      new_mem = old_mmem->memory_at(i);
+    }
+    // else preceeding memory was not a MergeMem
+
+    // replace equivalent phis (unfortunately, they do not GVN together)
+    if (new_mem != NULL && new_mem != new_base &&
+        new_mem->req() == phi_len && new_mem->in(0) == phi_reg) {
+      if (new_mem->is_Phi()) {
+        PhiNode* phi_mem = new_mem->as_Phi();
+        for (uint i = 1; i < phi_len; i++) {
+          if (phi_base->in(i) != phi_mem->in(i)) {
+            phi_mem = NULL;
+            break;
+          }
+        }
+        if (phi_mem != NULL) {
+          // equivalent phi nodes; revert to the def
+          new_mem = new_base;
+        }
+      }
+    }
+
+    // maybe store down a new value
+    Node* new_in = new_mem;
+    if (new_in == new_base)  new_in = empty_mem;
+
+    if (new_in != old_in) {
+      // Warning:  Do not combine this "if" with the previous "if"
+      // A memory slice might have be be rewritten even if it is semantically
+      // unchanged, if the base_memory value has changed.
+      set_req(i, new_in);
+      progress = this;          // Report progress
+    }
+  }
+
+  if (new_base != old_base) {
+    set_req(Compile::AliasIdxBot, new_base);
+    // Don't use set_base_memory(new_base), because we need to update du.
+    assert(base_memory() == new_base, "");
+    progress = this;
+  }
+
+  if( base_memory() == this ) {
+    // a self cycle indicates this memory path is dead
+    set_req(Compile::AliasIdxBot, empty_mem);
+  }
+
+  // Resolve external cycles by calling Ideal on a MergeMem base_memory
+  // Recursion must occur after the self cycle check above
+  if( base_memory()->is_MergeMem() ) {
+    MergeMemNode *new_mbase = base_memory()->as_MergeMem();
+    Node *m = phase->transform(new_mbase);  // Rollup any cycles
+    if( m != NULL && (m->is_top() ||
+        m->is_MergeMem() && m->as_MergeMem()->base_memory() == empty_mem) ) {
+      // propagate rollup of dead cycle to self
+      set_req(Compile::AliasIdxBot, empty_mem);
+    }
+  }
+
+  if( base_memory() == empty_mem ) {
+    progress = this;
+    // Cut inputs during Parse phase only.
+    // During Optimize phase a dead MergeMem node will be subsumed by Top.
+    if( !can_reshape ) {
+      for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+        if( in(i) != empty_mem ) { set_req(i, empty_mem); }
+      }
+    }
+  }
+
+  if( !progress && base_memory()->is_Phi() && can_reshape ) {
+    // Check if PhiNode::Ideal's "Split phis through memory merges"
+    // transform should be attempted. Look for this->phi->this cycle.
+    uint merge_width = req();
+    if (merge_width > Compile::AliasIdxRaw) {
+      PhiNode* phi = base_memory()->as_Phi();
+      for( uint i = 1; i < phi->req(); ++i ) {// For all paths in
+        if (phi->in(i) == this) {
+          phase->is_IterGVN()->_worklist.push(phi);
+          break;
+        }
+      }
+    }
+  }
+
+  assert(verify_sparse(), "please, no dups of base");
+  return progress;
+}
+
+//-------------------------set_base_memory-------------------------------------
+void MergeMemNode::set_base_memory(Node *new_base) {
+  Node* empty_mem = empty_memory();
+  set_req(Compile::AliasIdxBot, new_base);
+  assert(memory_at(req()) == new_base, "must set default memory");
+  // Clear out other occurrences of new_base:
+  if (new_base != empty_mem) {
+    for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+      if (in(i) == new_base)  set_req(i, empty_mem);
+    }
+  }
+}
+
+//------------------------------out_RegMask------------------------------------
+const RegMask &MergeMemNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//------------------------------dump_spec--------------------------------------
+#ifndef PRODUCT
+void MergeMemNode::dump_spec(outputStream *st) const {
+  st->print(" {");
+  Node* base_mem = base_memory();
+  for( uint i = Compile::AliasIdxRaw; i < req(); i++ ) {
+    Node* mem = memory_at(i);
+    if (mem == base_mem) { st->print(" -"); continue; }
+    st->print( " N%d:", mem->_idx );
+    Compile::current()->get_adr_type(i)->dump_on(st);
+  }
+  st->print(" }");
+}
+#endif // !PRODUCT
+
+
+#ifdef ASSERT
+static bool might_be_same(Node* a, Node* b) {
+  if (a == b)  return true;
+  if (!(a->is_Phi() || b->is_Phi()))  return false;
+  // phis shift around during optimization
+  return true;  // pretty stupid...
+}
+
+// verify a narrow slice (either incoming or outgoing)
+static void verify_memory_slice(const MergeMemNode* m, int alias_idx, Node* n) {
+  if (!VerifyAliases)       return;  // don't bother to verify unless requested
+  if (is_error_reported())  return;  // muzzle asserts when debugging an error
+  if (Node::in_dump())      return;  // muzzle asserts when printing
+  assert(alias_idx >= Compile::AliasIdxRaw, "must not disturb base_memory or sentinel");
+  assert(n != NULL, "");
+  // Elide intervening MergeMem's
+  while (n->is_MergeMem()) {
+    n = n->as_MergeMem()->memory_at(alias_idx);
+  }
+  Compile* C = Compile::current();
+  const TypePtr* n_adr_type = n->adr_type();
+  if (n == m->empty_memory()) {
+    // Implicit copy of base_memory()
+  } else if (n_adr_type != TypePtr::BOTTOM) {
+    assert(n_adr_type != NULL, "new memory must have a well-defined adr_type");
+    assert(C->must_alias(n_adr_type, alias_idx), "new memory must match selected slice");
+  } else {
+    // A few places like make_runtime_call "know" that VM calls are narrow,
+    // and can be used to update only the VM bits stored as TypeRawPtr::BOTTOM.
+    bool expected_wide_mem = false;
+    if (n == m->base_memory()) {
+      expected_wide_mem = true;
+    } else if (alias_idx == Compile::AliasIdxRaw ||
+               n == m->memory_at(Compile::AliasIdxRaw)) {
+      expected_wide_mem = true;
+    } else if (!C->alias_type(alias_idx)->is_rewritable()) {
+      // memory can "leak through" calls on channels that
+      // are write-once.  Allow this also.
+      expected_wide_mem = true;
+    }
+    assert(expected_wide_mem, "expected narrow slice replacement");
+  }
+}
+#else // !ASSERT
+#define verify_memory_slice(m,i,n) (0)  // PRODUCT version is no-op
+#endif
+
+
+//-----------------------------memory_at---------------------------------------
+Node* MergeMemNode::memory_at(uint alias_idx) const {
+  assert(alias_idx >= Compile::AliasIdxRaw ||
+         alias_idx == Compile::AliasIdxBot && Compile::current()->AliasLevel() == 0,
+         "must avoid base_memory and AliasIdxTop");
+
+  // Otherwise, it is a narrow slice.
+  Node* n = alias_idx < req() ? in(alias_idx) : empty_memory();
+  Compile *C = Compile::current();
+  if (is_empty_memory(n)) {
+    // the array is sparse; empty slots are the "top" node
+    n = base_memory();
+    assert(Node::in_dump()
+           || n == NULL || n->bottom_type() == Type::TOP
+           || n->adr_type() == TypePtr::BOTTOM
+           || n->adr_type() == TypeRawPtr::BOTTOM
+           || Compile::current()->AliasLevel() == 0,
+           "must be a wide memory");
+    // AliasLevel == 0 if we are organizing the memory states manually.
+    // See verify_memory_slice for comments on TypeRawPtr::BOTTOM.
+  } else {
+    // make sure the stored slice is sane
+    #ifdef ASSERT
+    if (is_error_reported() || Node::in_dump()) {
+    } else if (might_be_same(n, base_memory())) {
+      // Give it a pass:  It is a mostly harmless repetition of the base.
+      // This can arise normally from node subsumption during optimization.
+    } else {
+      verify_memory_slice(this, alias_idx, n);
+    }
+    #endif
+  }
+  return n;
+}
+
+//---------------------------set_memory_at-------------------------------------
+void MergeMemNode::set_memory_at(uint alias_idx, Node *n) {
+  verify_memory_slice(this, alias_idx, n);
+  Node* empty_mem = empty_memory();
+  if (n == base_memory())  n = empty_mem;  // collapse default
+  uint need_req = alias_idx+1;
+  if (req() < need_req) {
+    if (n == empty_mem)  return;  // already the default, so do not grow me
+    // grow the sparse array
+    do {
+      add_req(empty_mem);
+    } while (req() < need_req);
+  }
+  set_req( alias_idx, n );
+}
+
+
+
+//--------------------------iteration_setup------------------------------------
+void MergeMemNode::iteration_setup(const MergeMemNode* other) {
+  if (other != NULL) {
+    grow_to_match(other);
+    // invariant:  the finite support of mm2 is within mm->req()
+    #ifdef ASSERT
+    for (uint i = req(); i < other->req(); i++) {
+      assert(other->is_empty_memory(other->in(i)), "slice left uncovered");
+    }
+    #endif
+  }
+  // Replace spurious copies of base_memory by top.
+  Node* base_mem = base_memory();
+  if (base_mem != NULL && !base_mem->is_top()) {
+    for (uint i = Compile::AliasIdxBot+1, imax = req(); i < imax; i++) {
+      if (in(i) == base_mem)
+        set_req(i, empty_memory());
+    }
+  }
+}
+
+//---------------------------grow_to_match-------------------------------------
+void MergeMemNode::grow_to_match(const MergeMemNode* other) {
+  Node* empty_mem = empty_memory();
+  assert(other->is_empty_memory(empty_mem), "consistent sentinels");
+  // look for the finite support of the other memory
+  for (uint i = other->req(); --i >= req(); ) {
+    if (other->in(i) != empty_mem) {
+      uint new_len = i+1;
+      while (req() < new_len)  add_req(empty_mem);
+      break;
+    }
+  }
+}
+
+//---------------------------verify_sparse-------------------------------------
+#ifndef PRODUCT
+bool MergeMemNode::verify_sparse() const {
+  assert(is_empty_memory(make_empty_memory()), "sane sentinel");
+  Node* base_mem = base_memory();
+  // The following can happen in degenerate cases, since empty==top.
+  if (is_empty_memory(base_mem))  return true;
+  for (uint i = Compile::AliasIdxRaw; i < req(); i++) {
+    assert(in(i) != NULL, "sane slice");
+    if (in(i) == base_mem)  return false;  // should have been the sentinel value!
+  }
+  return true;
+}
+
+bool MergeMemStream::match_memory(Node* mem, const MergeMemNode* mm, int idx) {
+  Node* n;
+  n = mm->in(idx);
+  if (mem == n)  return true;  // might be empty_memory()
+  n = (idx == Compile::AliasIdxBot)? mm->base_memory(): mm->memory_at(idx);
+  if (mem == n)  return true;
+  while (n->is_Phi() && (n = n->as_Phi()->is_copy()) != NULL) {
+    if (mem == n)  return true;
+    if (n == NULL)  break;
+  }
+  return false;
+}
+#endif // !PRODUCT
diff --git a/src/share/vm/opto/memnode.hpp b/src/share/vm/opto/memnode.hpp
new file mode 100644
index 000000000..989e255a9
--- /dev/null
+++ b/src/share/vm/opto/memnode.hpp
@@ -0,0 +1,1062 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class MultiNode;
+class PhaseCCP;
+class PhaseTransform;
+
+//------------------------------MemNode----------------------------------------
+// Load or Store, possibly throwing a NULL pointer exception
+class MemNode : public Node {
+protected:
+#ifdef ASSERT
+  const TypePtr* _adr_type;     // What kind of memory is being addressed?
+#endif
+  virtual uint size_of() const; // Size is bigger (ASSERT only)
+public:
+  enum { Control,               // When is it safe to do this load?
+         Memory,                // Chunk of memory is being loaded from
+         Address,               // Actually address, derived from base
+         ValueIn,               // Value to store
+         OopStore               // Preceeding oop store, only in StoreCM
+  };
+protected:
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at )
+    : Node(c0,c1,c2   ) {
+    init_class_id(Class_Mem);
+    debug_only(_adr_type=at; adr_type();)
+  }
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3 )
+    : Node(c0,c1,c2,c3) {
+    init_class_id(Class_Mem);
+    debug_only(_adr_type=at; adr_type();)
+  }
+  MemNode( Node *c0, Node *c1, Node *c2, const TypePtr* at, Node *c3, Node *c4)
+    : Node(c0,c1,c2,c3,c4) {
+    init_class_id(Class_Mem);
+    debug_only(_adr_type=at; adr_type();)
+  }
+
+  // Helpers for the optimizer.  Documented in memnode.cpp.
+  static bool detect_ptr_independence(Node* p1, AllocateNode* a1,
+                                      Node* p2, AllocateNode* a2,
+                                      PhaseTransform* phase);
+  static bool adr_phi_is_loop_invariant(Node* adr_phi, Node* cast);
+
+public:
+  // This one should probably be a phase-specific function:
+  static bool detect_dominating_control(Node* dom, Node* sub);
+
+  // Is this Node a MemNode or some descendent?  Default is YES.
+  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
+
+  virtual const class TypePtr *adr_type() const;  // returns bottom_type of address
+
+  // Shared code for Ideal methods:
+  Node *Ideal_common(PhaseGVN *phase, bool can_reshape);  // Return -1 for short-circuit NULL.
+
+  // Helper function for adr_type() implementations.
+  static const TypePtr* calculate_adr_type(const Type* t, const TypePtr* cross_check = NULL);
+
+  // Raw access function, to allow copying of adr_type efficiently in
+  // product builds and retain the debug info for debug builds.
+  const TypePtr *raw_adr_type() const {
+#ifdef ASSERT
+    return _adr_type;
+#else
+    return 0;
+#endif
+  }
+
+  // Map a load or store opcode to its corresponding store opcode.
+  // (Return -1 if unknown.)
+  virtual int store_Opcode() const { return -1; }
+
+  // What is the type of the value in memory?  (T_VOID mean "unspecified".)
+  virtual BasicType memory_type() const = 0;
+  virtual int memory_size() const { return type2aelembytes[memory_type()]; }
+
+  // Search through memory states which precede this node (load or store).
+  // Look for an exact match for the address, with no intervening
+  // aliased stores.
+  Node* find_previous_store(PhaseTransform* phase);
+
+  // Can this node (load or store) accurately see a stored value in
+  // the given memory state?  (The state may or may not be in(Memory).)
+  Node* can_see_stored_value(Node* st, PhaseTransform* phase) const;
+
+#ifndef PRODUCT
+  static void dump_adr_type(const Node* mem, const TypePtr* adr_type, outputStream *st);
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------LoadNode---------------------------------------
+// Load value; requires Memory and Address
+class LoadNode : public MemNode {
+protected:
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+  const Type* const _type;      // What kind of value is loaded?
+public:
+
+  LoadNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt )
+    : MemNode(c,mem,adr,at), _type(rt) {
+    init_class_id(Class_Load);
+  }
+
+  // Polymorphic factory method:
+  static LoadNode* make( Compile *C, Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *rt, BasicType bt );
+
+  virtual uint hash()   const;  // Check the type
+
+  // Handle algebraic identities here.  If we have an identity, return the Node
+  // we are equivalent to.  We look for Load of a Store.
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // If the load is from Field memory and the pointer is non-null, we can
+  // zero out the control input.
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  // Compute a new Type for this node.  Basically we just do the pre-check,
+  // then call the virtual add() to set the type.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  virtual uint ideal_reg() const;
+  virtual const Type *bottom_type() const;
+  // Following method is copied from TypeNode:
+  void set_type(const Type* t) {
+    assert(t != NULL, "sanity");
+    debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+    *(const Type**)&_type = t;   // cast away const-ness
+    // If this node is in the hash table, make sure it doesn't need a rehash.
+    assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code");
+  }
+  const Type* type() const { assert(_type != NULL, "sanity"); return _type; };
+
+  // Do not match memory edge
+  virtual uint match_edge(uint idx) const;
+
+  // Map a load opcode to its corresponding store opcode.
+  virtual int store_Opcode() const = 0;
+
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+protected:
+  const Type* load_array_final_field(const TypeKlassPtr *tkls,
+                                     ciKlass* klass) const;
+};
+
+//------------------------------LoadBNode--------------------------------------
+// Load a byte (8bits signed) from memory
+class LoadBNode : public LoadNode {
+public:
+  LoadBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int store_Opcode() const { return Op_StoreB; }
+  virtual BasicType memory_type() const { return T_BYTE; }
+};
+
+//------------------------------LoadCNode--------------------------------------
+// Load a char (16bits unsigned) from memory
+class LoadCNode : public LoadNode {
+public:
+  LoadCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int store_Opcode() const { return Op_StoreC; }
+  virtual BasicType memory_type() const { return T_CHAR; }
+};
+
+//------------------------------LoadINode--------------------------------------
+// Load an integer from memory
+class LoadINode : public LoadNode {
+public:
+  LoadINode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual int store_Opcode() const { return Op_StoreI; }
+  virtual BasicType memory_type() const { return T_INT; }
+};
+
+//------------------------------LoadRangeNode----------------------------------
+// Load an array length from the array
+class LoadRangeNode : public LoadINode {
+public:
+  LoadRangeNode( Node *c, Node *mem, Node *adr, const TypeInt *ti = TypeInt::POS )
+    : LoadINode(c,mem,adr,TypeAryPtr::RANGE,ti) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+};
+
+//------------------------------LoadLNode--------------------------------------
+// Load a long from memory
+class LoadLNode : public LoadNode {
+  virtual uint hash() const { return LoadNode::hash() + _require_atomic_access; }
+  virtual uint cmp( const Node &n ) const {
+    return _require_atomic_access == ((LoadLNode&)n)._require_atomic_access
+      && LoadNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
+  const bool _require_atomic_access;  // is piecewise load forbidden?
+
+public:
+  LoadLNode( Node *c, Node *mem, Node *adr, const TypePtr* at,
+             const TypeLong *tl = TypeLong::LONG,
+             bool require_atomic_access = false )
+    : LoadNode(c,mem,adr,at,tl)
+    , _require_atomic_access(require_atomic_access)
+  {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegL; }
+  virtual int store_Opcode() const { return Op_StoreL; }
+  virtual BasicType memory_type() const { return T_LONG; }
+  bool require_atomic_access() { return _require_atomic_access; }
+  static LoadLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, const Type* rt);
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const {
+    LoadNode::dump_spec(st);
+    if (_require_atomic_access)  st->print(" Atomic!");
+  }
+#endif
+};
+
+//------------------------------LoadL_unalignedNode----------------------------
+// Load a long from unaligned memory
+class LoadL_unalignedNode : public LoadLNode {
+public:
+  LoadL_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
+    : LoadLNode(c,mem,adr,at) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LoadFNode--------------------------------------
+// Load a float (64 bits) from memory
+class LoadFNode : public LoadNode {
+public:
+  LoadFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::FLOAT )
+    : LoadNode(c,mem,adr,at,t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegF; }
+  virtual int store_Opcode() const { return Op_StoreF; }
+  virtual BasicType memory_type() const { return T_FLOAT; }
+};
+
+//------------------------------LoadDNode--------------------------------------
+// Load a double (64 bits) from memory
+class LoadDNode : public LoadNode {
+public:
+  LoadDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const Type *t = Type::DOUBLE )
+    : LoadNode(c,mem,adr,at,t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual int store_Opcode() const { return Op_StoreD; }
+  virtual BasicType memory_type() const { return T_DOUBLE; }
+};
+
+//------------------------------LoadD_unalignedNode----------------------------
+// Load a double from unaligned memory
+class LoadD_unalignedNode : public LoadDNode {
+public:
+  LoadD_unalignedNode( Node *c, Node *mem, Node *adr, const TypePtr* at )
+    : LoadDNode(c,mem,adr,at) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LoadPNode--------------------------------------
+// Load a pointer from memory (either object or array)
+class LoadPNode : public LoadNode {
+public:
+  LoadPNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypePtr* t )
+    : LoadNode(c,mem,adr,at,t) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegP; }
+  virtual int store_Opcode() const { return Op_StoreP; }
+  virtual BasicType memory_type() const { return T_ADDRESS; }
+  // depends_only_on_test is almost always true, and needs to be almost always
+  // true to enable key hoisting & commoning optimizations.  However, for the
+  // special case of RawPtr loads from TLS top & end, the control edge carries
+  // the dependence preventing hoisting past a Safepoint instead of the memory
+  // edge.  (An unfortunate consequence of having Safepoints not set Raw
+  // Memory; itself an unfortunate consequence of having Nodes which produce
+  // results (new raw memory state) inside of loops preventing all manner of
+  // other optimizations).  Basically, it's ugly but so is the alternative.
+  // See comment in macro.cpp, around line 125 expand_allocate_common().
+  virtual bool depends_only_on_test() const { return adr_type() != TypeRawPtr::BOTTOM; }
+};
+
+//------------------------------LoadKlassNode----------------------------------
+// Load a Klass from an object
+class LoadKlassNode : public LoadPNode {
+public:
+  LoadKlassNode( Node *c, Node *mem, Node *adr, const TypePtr *at, const TypeKlassPtr *tk = TypeKlassPtr::OBJECT )
+    : LoadPNode(c,mem,adr,at,tk) {}
+  virtual int Opcode() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual bool depends_only_on_test() const { return true; }
+};
+
+//------------------------------LoadSNode--------------------------------------
+// Load a short (16bits signed) from memory
+class LoadSNode : public LoadNode {
+public:
+  LoadSNode( Node *c, Node *mem, Node *adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT )
+    : LoadNode(c,mem,adr,at,ti) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual int store_Opcode() const { return Op_StoreC; }
+  virtual BasicType memory_type() const { return T_SHORT; }
+};
+
+//------------------------------StoreNode--------------------------------------
+// Store value; requires Store, Address and Value
+class StoreNode : public MemNode {
+protected:
+  virtual uint cmp( const Node &n ) const;
+  virtual bool depends_only_on_test() const { return false; }
+
+  Node *Ideal_masked_input       (PhaseGVN *phase, uint mask);
+  Node *Ideal_sign_extended_input(PhaseGVN *phase, int  num_bits);
+
+public:
+  StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val )
+    : MemNode(c,mem,adr,at,val) {
+    init_class_id(Class_Store);
+  }
+  StoreNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store )
+    : MemNode(c,mem,adr,at,val,oop_store) {
+    init_class_id(Class_Store);
+  }
+
+  // Polymorphic factory method:
+  static StoreNode* make( Compile *C, Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, BasicType bt );
+
+  virtual uint hash() const;    // Check the type
+
+  // If the store is to Field memory and the pointer is non-null, we can
+  // zero out the control input.
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  // Compute a new Type for this node.  Basically we just do the pre-check,
+  // then call the virtual add() to set the type.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  // Check for identity function on memory (Load then Store at same address)
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // Do not match memory edge
+  virtual uint match_edge(uint idx) const;
+
+  virtual const Type *bottom_type() const;  // returns Type::MEMORY
+
+  // Map a store opcode to its corresponding own opcode, trivially.
+  virtual int store_Opcode() const { return Opcode(); }
+
+  // have all possible loads of the value stored been optimized away?
+  bool value_never_loaded(PhaseTransform *phase) const;
+};
+
+//------------------------------StoreBNode-------------------------------------
+// Store byte to memory
+class StoreBNode : public StoreNode {
+public:
+  StoreBNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual BasicType memory_type() const { return T_BYTE; }
+};
+
+//------------------------------StoreCNode-------------------------------------
+// Store char/short to memory
+class StoreCNode : public StoreNode {
+public:
+  StoreCNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual BasicType memory_type() const { return T_CHAR; }
+};
+
+//------------------------------StoreINode-------------------------------------
+// Store int to memory
+class StoreINode : public StoreNode {
+public:
+  StoreINode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual BasicType memory_type() const { return T_INT; }
+};
+
+//------------------------------StoreLNode-------------------------------------
+// Store long to memory
+class StoreLNode : public StoreNode {
+  virtual uint hash() const { return StoreNode::hash() + _require_atomic_access; }
+  virtual uint cmp( const Node &n ) const {
+    return _require_atomic_access == ((StoreLNode&)n)._require_atomic_access
+      && StoreNode::cmp(n);
+  }
+  virtual uint size_of() const { return sizeof(*this); }
+  const bool _require_atomic_access;  // is piecewise store forbidden?
+
+public:
+  StoreLNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val,
+              bool require_atomic_access = false )
+    : StoreNode(c,mem,adr,at,val)
+    , _require_atomic_access(require_atomic_access)
+  {}
+  virtual int Opcode() const;
+  virtual BasicType memory_type() const { return T_LONG; }
+  bool require_atomic_access() { return _require_atomic_access; }
+  static StoreLNode* make_atomic(Compile *C, Node* ctl, Node* mem, Node* adr, const TypePtr* adr_type, Node* val);
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const {
+    StoreNode::dump_spec(st);
+    if (_require_atomic_access)  st->print(" Atomic!");
+  }
+#endif
+};
+
+//------------------------------StoreFNode-------------------------------------
+// Store float to memory
+class StoreFNode : public StoreNode {
+public:
+  StoreFNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual BasicType memory_type() const { return T_FLOAT; }
+};
+
+//------------------------------StoreDNode-------------------------------------
+// Store double to memory
+class StoreDNode : public StoreNode {
+public:
+  StoreDNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual BasicType memory_type() const { return T_DOUBLE; }
+};
+
+//------------------------------StorePNode-------------------------------------
+// Store pointer to memory
+class StorePNode : public StoreNode {
+public:
+  StorePNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val ) : StoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual BasicType memory_type() const { return T_ADDRESS; }
+};
+
+//------------------------------StoreCMNode-----------------------------------
+// Store card-mark byte to memory for CM
+// The last StoreCM before a SafePoint must be preserved and occur after its "oop" store
+// Preceeding equivalent StoreCMs may be eliminated.
+class StoreCMNode : public StoreNode {
+public:
+  StoreCMNode( Node *c, Node *mem, Node *adr, const TypePtr* at, Node *val, Node *oop_store ) : StoreNode(c,mem,adr,at,val,oop_store) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual BasicType memory_type() const { return T_VOID; } // unspecific
+};
+
+//------------------------------LoadPLockedNode---------------------------------
+// Load-locked a pointer from memory (either object or array).
+// On Sparc & Intel this is implemented as a normal pointer load.
+// On PowerPC and friends it's a real load-locked.
+class LoadPLockedNode : public LoadPNode {
+public:
+  LoadPLockedNode( Node *c, Node *mem, Node *adr )
+    : LoadPNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeRawPtr::BOTTOM) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_StorePConditional; }
+  virtual bool depends_only_on_test() const { return true; }
+};
+
+//------------------------------LoadLLockedNode---------------------------------
+// Load-locked a pointer from memory (either object or array).
+// On Sparc & Intel this is implemented as a normal long load.
+class LoadLLockedNode : public LoadLNode {
+public:
+  LoadLLockedNode( Node *c, Node *mem, Node *adr )
+    : LoadLNode(c,mem,adr,TypeRawPtr::BOTTOM, TypeLong::LONG) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_StoreLConditional; }
+};
+
+//------------------------------SCMemProjNode---------------------------------------
+// This class defines a projection of the memory  state of a store conditional node.
+// These nodes return a value, but also update memory.
+class SCMemProjNode : public ProjNode {
+public:
+  enum {SCMEMPROJCON = (uint)-2};
+  SCMemProjNode( Node *src) : ProjNode( src, SCMEMPROJCON) { }
+  virtual int Opcode() const;
+  virtual bool      is_CFG() const  { return false; }
+  virtual const Type *bottom_type() const {return Type::MEMORY;}
+  virtual const TypePtr *adr_type() const { return in(0)->in(MemNode::Memory)->adr_type();}
+  virtual uint ideal_reg() const { return 0;} // memory projections don't have a register
+  virtual const Type *Value( PhaseTransform *phase ) const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const {};
+#endif
+};
+
+//------------------------------LoadStoreNode---------------------------
+class LoadStoreNode : public Node {
+public:
+  enum {
+    ExpectedIn = MemNode::ValueIn+1 // One more input than MemNode
+  };
+  LoadStoreNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex);
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual uint match_edge(uint idx) const { return idx == MemNode::Address || idx == MemNode::ValueIn; }
+};
+
+//------------------------------StorePConditionalNode---------------------------
+// Conditionally store pointer to memory, if no change since prior
+// load-locked.  Sets flags for success or failure of the store.
+class StorePConditionalNode : public LoadStoreNode {
+public:
+  StorePConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+  virtual int Opcode() const;
+  // Produces flags
+  virtual uint ideal_reg() const { return Op_RegFlags; }
+};
+
+//------------------------------StoreLConditionalNode---------------------------
+// Conditionally store long to memory, if no change since prior
+// load-locked.  Sets flags for success or failure of the store.
+class StoreLConditionalNode : public LoadStoreNode {
+public:
+  StoreLConditionalNode( Node *c, Node *mem, Node *adr, Node *val, Node *ll ) : LoadStoreNode(c, mem, adr, val, ll) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapLNode---------------------------
+class CompareAndSwapLNode : public LoadStoreNode {
+public:
+  CompareAndSwapLNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapINode---------------------------
+class CompareAndSwapINode : public LoadStoreNode {
+public:
+  CompareAndSwapINode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  virtual int Opcode() const;
+};
+
+
+//------------------------------CompareAndSwapPNode---------------------------
+class CompareAndSwapPNode : public LoadStoreNode {
+public:
+  CompareAndSwapPNode( Node *c, Node *mem, Node *adr, Node *val, Node *ex) : LoadStoreNode(c, mem, adr, val, ex) { }
+  virtual int Opcode() const;
+};
+
+//------------------------------ClearArray-------------------------------------
+class ClearArrayNode: public Node {
+public:
+  ClearArrayNode( Node *ctrl, Node *arymem, Node *word_cnt, Node *base ) : Node(ctrl,arymem,word_cnt,base) {}
+  virtual int         Opcode() const;
+  virtual const Type *bottom_type() const { return Type::MEMORY; }
+  // ClearArray modifies array elements, and so affects only the
+  // array memory addressed by the bottom_type of its base address.
+  virtual const class TypePtr *adr_type() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint match_edge(uint idx) const;
+
+  // Clear the given area of an object or array.
+  // The start offset must always be aligned mod BytesPerInt.
+  // The end offset must always be aligned mod BytesPerLong.
+  // Return the new memory.
+  static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            intptr_t start_offset,
+                            intptr_t end_offset,
+                            PhaseGVN* phase);
+  static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            intptr_t start_offset,
+                            Node* end_offset,
+                            PhaseGVN* phase);
+  static Node* clear_memory(Node* control, Node* mem, Node* dest,
+                            Node* start_offset,
+                            Node* end_offset,
+                            PhaseGVN* phase);
+};
+
+//------------------------------StrComp-------------------------------------
+class StrCompNode: public Node {
+public:
+  StrCompNode(Node *control,
+              Node* char_array_mem,
+              Node* value_mem,
+              Node* count_mem,
+              Node* offset_mem,
+              Node* s1, Node* s2): Node(control,
+                                        char_array_mem,
+                                        value_mem,
+                                        count_mem,
+                                        offset_mem,
+                                        s1, s2) {};
+  virtual int Opcode() const;
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type* bottom_type() const { return TypeInt::INT; }
+  // a StrCompNode (conservatively) aliases with everything:
+  virtual const TypePtr* adr_type() const { return TypePtr::BOTTOM; }
+  virtual uint match_edge(uint idx) const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------MemBar-----------------------------------------
+// There are different flavors of Memory Barriers to match the Java Memory
+// Model.  Monitor-enter and volatile-load act as Aquires: no following ref
+// can be moved to before them.  We insert a MemBar-Acquire after a FastLock or
+// volatile-load.  Monitor-exit and volatile-store act as Release: no
+// preceeding ref can be moved to after them.  We insert a MemBar-Release
+// before a FastUnlock or volatile-store.  All volatiles need to be
+// serialized, so we follow all volatile-stores with a MemBar-Volatile to
+// seperate it from any following volatile-load.
+class MemBarNode: public MultiNode {
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
+
+  virtual uint size_of() const { return sizeof(*this); }
+  // Memory type this node is serializing.  Usually either rawptr or bottom.
+  const TypePtr* _adr_type;
+
+public:
+  enum {
+    Precedent = TypeFunc::Parms  // optional edge to force precedence
+  };
+  MemBarNode(Compile* C, int alias_idx, Node* precedent);
+  virtual int Opcode() const = 0;
+  virtual const class TypePtr *adr_type() const { return _adr_type; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint match_edge(uint idx) const { return 0; }
+  virtual const Type *bottom_type() const { return TypeTuple::MEMBAR; }
+  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  // Factory method.  Builds a wide or narrow membar.
+  // Optional 'precedent' becomes an extra edge if not null.
+  static MemBarNode* make(Compile* C, int opcode,
+                          int alias_idx = Compile::AliasIdxBot,
+                          Node* precedent = NULL);
+};
+
+// "Acquire" - no following ref can move before (but earlier refs can
+// follow, like an early Load stalled in cache).  Requires multi-cpu
+// visibility.  Inserted after a volatile load or FastLock.
+class MemBarAcquireNode: public MemBarNode {
+public:
+  MemBarAcquireNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {}
+  virtual int Opcode() const;
+};
+
+// "Release" - no earlier ref can move after (but later refs can move
+// up, like a speculative pipelined cache-hitting Load).  Requires
+// multi-cpu visibility.  Inserted before a volatile store or FastUnLock.
+class MemBarReleaseNode: public MemBarNode {
+public:
+  MemBarReleaseNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {}
+  virtual int Opcode() const;
+};
+
+// Ordering between a volatile store and a following volatile load.
+// Requires multi-CPU visibility?
+class MemBarVolatileNode: public MemBarNode {
+public:
+  MemBarVolatileNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {}
+  virtual int Opcode() const;
+};
+
+// Ordering within the same CPU.  Used to order unsafe memory references
+// inside the compiler when we lack alias info.  Not needed "outside" the
+// compiler because the CPU does all the ordering for us.
+class MemBarCPUOrderNode: public MemBarNode {
+public:
+  MemBarCPUOrderNode(Compile* C, int alias_idx, Node* precedent)
+    : MemBarNode(C, alias_idx, precedent) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return 0; } // not matched in the AD file
+};
+
+// Isolation of object setup after an AllocateNode and before next safepoint.
+// (See comment in memnode.cpp near InitializeNode::InitializeNode for semantics.)
+class InitializeNode: public MemBarNode {
+  friend class AllocateNode;
+
+  bool _is_complete;
+
+public:
+  enum {
+    Control    = TypeFunc::Control,
+    Memory     = TypeFunc::Memory,     // MergeMem for states affected by this op
+    RawAddress = TypeFunc::Parms+0,    // the newly-allocated raw address
+    RawStores  = TypeFunc::Parms+1     // zero or more stores (or TOP)
+  };
+
+  InitializeNode(Compile* C, int adr_type, Node* rawoop);
+  virtual int Opcode() const;
+  virtual uint size_of() const { return sizeof(*this); }
+  virtual uint ideal_reg() const { return 0; } // not matched in the AD file
+  virtual const RegMask &in_RegMask(uint) const;  // mask for RawAddress
+
+  // Manage incoming memory edges via a MergeMem on in(Memory):
+  Node* memory(uint alias_idx);
+
+  // The raw memory edge coming directly from the Allocation.
+  // The contents of this memory are *always* all-zero-bits.
+  Node* zero_memory() { return memory(Compile::AliasIdxRaw); }
+
+  // Return the corresponding allocation for this initialization (or null if none).
+  // (Note: Both InitializeNode::allocation and AllocateNode::initialization
+  // are defined in graphKit.cpp, which sets up the bidirectional relation.)
+  AllocateNode* allocation();
+
+  // Anything other than zeroing in this init?
+  bool is_non_zero();
+
+  // An InitializeNode must completed before macro expansion is done.
+  // Completion requires that the AllocateNode must be followed by
+  // initialization of the new memory to zero, then to any initializers.
+  bool is_complete() { return _is_complete; }
+
+  // Mark complete.  (Must not yet be complete.)
+  void set_complete(PhaseGVN* phase);
+
+#ifdef ASSERT
+  // ensure all non-degenerate stores are ordered and non-overlapping
+  bool stores_are_sane(PhaseTransform* phase);
+#endif //ASSERT
+
+  // See if this store can be captured; return offset where it initializes.
+  // Return 0 if the store cannot be moved (any sort of problem).
+  intptr_t can_capture_store(StoreNode* st, PhaseTransform* phase);
+
+  // Capture another store; reformat it to write my internal raw memory.
+  // Return the captured copy, else NULL if there is some sort of problem.
+  Node* capture_store(StoreNode* st, intptr_t start, PhaseTransform* phase);
+
+  // Find captured store which corresponds to the range [start..start+size).
+  // Return my own memory projection (meaning the initial zero bits)
+  // if there is no such store.  Return NULL if there is a problem.
+  Node* find_captured_store(intptr_t start, int size_in_bytes, PhaseTransform* phase);
+
+  // Called when the associated AllocateNode is expanded into CFG.
+  Node* complete_stores(Node* rawctl, Node* rawmem, Node* rawptr,
+                        intptr_t header_size, Node* size_in_bytes,
+                        PhaseGVN* phase);
+
+ private:
+  void remove_extra_zeroes();
+
+  // Find out where a captured store should be placed (or already is placed).
+  int captured_store_insertion_point(intptr_t start, int size_in_bytes,
+                                     PhaseTransform* phase);
+
+  static intptr_t get_store_offset(Node* st, PhaseTransform* phase);
+
+  Node* make_raw_address(intptr_t offset, PhaseTransform* phase);
+
+  bool detect_init_independence(Node* n, bool st_is_pinned, int& count);
+
+  void coalesce_subword_stores(intptr_t header_size, Node* size_in_bytes,
+                               PhaseGVN* phase);
+
+  intptr_t find_next_fullword_store(uint i, PhaseGVN* phase);
+};
+
+//------------------------------MergeMem---------------------------------------
+// (See comment in memnode.cpp near MergeMemNode::MergeMemNode for semantics.)
+class MergeMemNode: public Node {
+  virtual uint hash() const ;                  // { return NO_HASH; }
+  virtual uint cmp( const Node &n ) const ;    // Always fail, except on self
+  friend class MergeMemStream;
+  MergeMemNode(Node* def);  // clients use MergeMemNode::make
+
+public:
+  // If the input is a whole memory state, clone it with all its slices intact.
+  // Otherwise, make a new memory state with just that base memory input.
+  // In either case, the result is a newly created MergeMem.
+  static MergeMemNode* make(Compile* C, Node* base_memory);
+
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual uint match_edge(uint idx) const { return 0; }
+  virtual const RegMask &out_RegMask() const;
+  virtual const Type *bottom_type() const { return Type::MEMORY; }
+  virtual const TypePtr *adr_type() const { return TypePtr::BOTTOM; }
+  // sparse accessors
+  // Fetch the previously stored "set_memory_at", or else the base memory.
+  // (Caller should clone it if it is a phi-nest.)
+  Node* memory_at(uint alias_idx) const;
+  // set the memory, regardless of its previous value
+  void set_memory_at(uint alias_idx, Node* n);
+  // the "base" is the memory that provides the non-finite support
+  Node* base_memory() const       { return in(Compile::AliasIdxBot); }
+  // warning: setting the base can implicitly set any of the other slices too
+  void set_base_memory(Node* def);
+  // sentinel value which denotes a copy of the base memory:
+  Node*   empty_memory() const    { return in(Compile::AliasIdxTop); }
+  static Node* make_empty_memory(); // where the sentinel comes from
+  bool is_empty_memory(Node* n) const { assert((n == empty_memory()) == n->is_top(), "sanity"); return n->is_top(); }
+  // hook for the iterator, to perform any necessary setup
+  void iteration_setup(const MergeMemNode* other = NULL);
+  // push sentinels until I am at least as long as the other (semantic no-op)
+  void grow_to_match(const MergeMemNode* other);
+  bool verify_sparse() const PRODUCT_RETURN0;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+class MergeMemStream : public StackObj {
+ private:
+  MergeMemNode*       _mm;
+  const MergeMemNode* _mm2;  // optional second guy, contributes non-empty iterations
+  Node*               _mm_base;  // loop-invariant base memory of _mm
+  int                 _idx;
+  int                 _cnt;
+  Node*               _mem;
+  Node*               _mem2;
+  int                 _cnt2;
+
+  void init(MergeMemNode* mm, const MergeMemNode* mm2 = NULL) {
+    // subsume_node will break sparseness at times, whenever a memory slice
+    // folds down to a copy of the base ("fat") memory.  In such a case,
+    // the raw edge will update to base, although it should be top.
+    // This iterator will recognize either top or base_memory as an
+    // "empty" slice.  See is_empty, is_empty2, and next below.
+    //
+    // The sparseness property is repaired in MergeMemNode::Ideal.
+    // As long as access to a MergeMem goes through this iterator
+    // or the memory_at accessor, flaws in the sparseness will
+    // never be observed.
+    //
+    // Also, iteration_setup repairs sparseness.
+    assert(mm->verify_sparse(), "please, no dups of base");
+    assert(mm2==NULL || mm2->verify_sparse(), "please, no dups of base");
+
+    _mm  = mm;
+    _mm_base = mm->base_memory();
+    _mm2 = mm2;
+    _cnt = mm->req();
+    _idx = Compile::AliasIdxBot-1; // start at the base memory
+    _mem = NULL;
+    _mem2 = NULL;
+  }
+
+#ifdef ASSERT
+  Node* check_memory() const {
+    if (at_base_memory())
+      return _mm->base_memory();
+    else if ((uint)_idx < _mm->req() && !_mm->in(_idx)->is_top())
+      return _mm->memory_at(_idx);
+    else
+      return _mm_base;
+  }
+  Node* check_memory2() const {
+    return at_base_memory()? _mm2->base_memory(): _mm2->memory_at(_idx);
+  }
+#endif
+
+  static bool match_memory(Node* mem, const MergeMemNode* mm, int idx) PRODUCT_RETURN0;
+  void assert_synch() const {
+    assert(!_mem || _idx >= _cnt || match_memory(_mem, _mm, _idx),
+           "no side-effects except through the stream");
+  }
+
+ public:
+
+  // expected usages:
+  // for (MergeMemStream mms(mem->is_MergeMem()); next_non_empty(); ) { ... }
+  // for (MergeMemStream mms(mem1, mem2); next_non_empty2(); ) { ... }
+
+  // iterate over one merge
+  MergeMemStream(MergeMemNode* mm) {
+    mm->iteration_setup();
+    init(mm);
+    debug_only(_cnt2 = 999);
+  }
+  // iterate in parallel over two merges
+  // only iterates through non-empty elements of mm2
+  MergeMemStream(MergeMemNode* mm, const MergeMemNode* mm2) {
+    assert(mm2, "second argument must be a MergeMem also");
+    ((MergeMemNode*)mm2)->iteration_setup();  // update hidden state
+    mm->iteration_setup(mm2);
+    init(mm, mm2);
+    _cnt2 = mm2->req();
+  }
+#ifdef ASSERT
+  ~MergeMemStream() {
+    assert_synch();
+  }
+#endif
+
+  MergeMemNode* all_memory() const {
+    return _mm;
+  }
+  Node* base_memory() const {
+    assert(_mm_base == _mm->base_memory(), "no update to base memory, please");
+    return _mm_base;
+  }
+  const MergeMemNode* all_memory2() const {
+    assert(_mm2 != NULL, "");
+    return _mm2;
+  }
+  bool at_base_memory() const {
+    return _idx == Compile::AliasIdxBot;
+  }
+  int alias_idx() const {
+    assert(_mem, "must call next 1st");
+    return _idx;
+  }
+
+  const TypePtr* adr_type() const {
+    return Compile::current()->get_adr_type(alias_idx());
+  }
+
+  const TypePtr* adr_type(Compile* C) const {
+    return C->get_adr_type(alias_idx());
+  }
+  bool is_empty() const {
+    assert(_mem, "must call next 1st");
+    assert(_mem->is_top() == (_mem==_mm->empty_memory()), "correct sentinel");
+    return _mem->is_top();
+  }
+  bool is_empty2() const {
+    assert(_mem2, "must call next 1st");
+    assert(_mem2->is_top() == (_mem2==_mm2->empty_memory()), "correct sentinel");
+    return _mem2->is_top();
+  }
+  Node* memory() const {
+    assert(!is_empty(), "must not be empty");
+    assert_synch();
+    return _mem;
+  }
+  // get the current memory, regardless of empty or non-empty status
+  Node* force_memory() const {
+    assert(!is_empty() || !at_base_memory(), "");
+    // Use _mm_base to defend against updates to _mem->base_memory().
+    Node *mem = _mem->is_top() ? _mm_base : _mem;
+    assert(mem == check_memory(), "");
+    return mem;
+  }
+  Node* memory2() const {
+    assert(_mem2 == check_memory2(), "");
+    return _mem2;
+  }
+  void set_memory(Node* mem) {
+    if (at_base_memory()) {
+      // Note that this does not change the invariant _mm_base.
+      _mm->set_base_memory(mem);
+    } else {
+      _mm->set_memory_at(_idx, mem);
+    }
+    _mem = mem;
+    assert_synch();
+  }
+
+  // Recover from a side effect to the MergeMemNode.
+  void set_memory() {
+    _mem = _mm->in(_idx);
+  }
+
+  bool next()  { return next(false); }
+  bool next2() { return next(true); }
+
+  bool next_non_empty()  { return next_non_empty(false); }
+  bool next_non_empty2() { return next_non_empty(true); }
+  // next_non_empty2 can yield states where is_empty() is true
+
+ private:
+  // find the next item, which might be empty
+  bool next(bool have_mm2) {
+    assert((_mm2 != NULL) == have_mm2, "use other next");
+    assert_synch();
+    if (++_idx < _cnt) {
+      // Note:  This iterator allows _mm to be non-sparse.
+      // It behaves the same whether _mem is top or base_memory.
+      _mem = _mm->in(_idx);
+      if (have_mm2)
+        _mem2 = _mm2->in((_idx < _cnt2) ? _idx : Compile::AliasIdxTop);
+      return true;
+    }
+    return false;
+  }
+
+  // find the next non-empty item
+  bool next_non_empty(bool have_mm2) {
+    while (next(have_mm2)) {
+      if (!is_empty()) {
+        // make sure _mem2 is filled in sensibly
+        if (have_mm2 && _mem2->is_top())  _mem2 = _mm2->base_memory();
+        return true;
+      } else if (have_mm2 && !is_empty2()) {
+        return true;   // is_empty() == true
+      }
+    }
+    return false;
+  }
+};
+
+//------------------------------Prefetch---------------------------------------
+
+// Non-faulting prefetch load.  Prefetch for many reads.
+class PrefetchReadNode : public Node {
+public:
+  PrefetchReadNode(Node *abio, Node *adr) : Node(0,abio,adr) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual uint match_edge(uint idx) const { return idx==2; }
+  virtual const Type *bottom_type() const { return Type::ABIO; }
+};
+
+// Non-faulting prefetch load.  Prefetch for many reads & many writes.
+class PrefetchWriteNode : public Node {
+public:
+  PrefetchWriteNode(Node *abio, Node *adr) : Node(0,abio,adr) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual uint match_edge(uint idx) const { return idx==2; }
+  virtual const Type *bottom_type() const { return Type::ABIO; }
+};
diff --git a/src/share/vm/opto/mulnode.cpp b/src/share/vm/opto/mulnode.cpp
new file mode 100644
index 000000000..146c432fe
--- /dev/null
+++ b/src/share/vm/opto/mulnode.cpp
@@ -0,0 +1,1310 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+#include "incls/_precompiled.incl"
+#include "incls/_mulnode.cpp.incl"
+
+
+//=============================================================================
+//------------------------------hash-------------------------------------------
+// Hash function over MulNodes.  Needs to be commutative; i.e., I swap
+// (commute) inputs to MulNodes willy-nilly so the hash function must return
+// the same value in the presence of edge swapping.
+uint MulNode::hash() const {
+  return (uintptr_t)in(1) + (uintptr_t)in(2) + Opcode();
+}
+
+//------------------------------Identity---------------------------------------
+// Multiplying a one preserves the other argument
+Node *MulNode::Identity( PhaseTransform *phase ) {
+  register const Type *one = mul_id();  // The multiplicative identity
+  if( phase->type( in(1) )->higher_equal( one ) ) return in(2);
+  if( phase->type( in(2) )->higher_equal( one ) ) return in(1);
+
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+// We also canonicalize the Node, moving constants to the right input,
+// and flatten expressions (so that 1+x+2 becomes x+3).
+Node *MulNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  Node *progress = NULL;        // Progress flag
+  // We are OK if right is a constant, or right is a load and
+  // left is a non-constant.
+  if( !(t2->singleton() ||
+        (in(2)->is_Load() && !(t1->singleton() || in(1)->is_Load())) ) ) {
+    if( t1->singleton() ||       // Left input is a constant?
+        // Otherwise, sort inputs (commutativity) to help value numbering.
+        (in(1)->_idx > in(2)->_idx) ) {
+      swap_edges(1, 2);
+      const Type *t = t1;
+      t1 = t2;
+      t2 = t;
+      progress = this;            // Made progress
+    }
+  }
+
+  // If the right input is a constant, and the left input is a product of a
+  // constant, flatten the expression tree.
+  uint op = Opcode();
+  if( t2->singleton() &&        // Right input is a constant?
+      op != Op_MulF &&          // Float & double cannot reassociate
+      op != Op_MulD ) {
+    if( t2 == Type::TOP ) return NULL;
+    Node *mul1 = in(1);
+#ifdef ASSERT
+    // Check for dead loop
+    int   op1 = mul1->Opcode();
+    if( phase->eqv( mul1, this ) || phase->eqv( in(2), this ) ||
+        ( op1 == mul_opcode() || op1 == add_opcode() ) &&
+        ( phase->eqv( mul1->in(1), this ) || phase->eqv( mul1->in(2), this ) ||
+          phase->eqv( mul1->in(1), mul1 ) || phase->eqv( mul1->in(2), mul1 ) ) )
+      assert(false, "dead loop in MulNode::Ideal");
+#endif
+
+    if( mul1->Opcode() == mul_opcode() ) {  // Left input is a multiply?
+      // Mul of a constant?
+      const Type *t12 = phase->type( mul1->in(2) );
+      if( t12->singleton() && t12 != Type::TOP) { // Left input is an add of a constant?
+        // Compute new constant; check for overflow
+        const Type *tcon01 = mul1->as_Mul()->mul_ring(t2,t12);
+        if( tcon01->singleton() ) {
+          // The Mul of the flattened expression
+          set_req(1, mul1->in(1));
+          set_req(2, phase->makecon( tcon01 ));
+          t2 = tcon01;
+          progress = this;      // Made progress
+        }
+      }
+    }
+    // If the right input is a constant, and the left input is an add of a
+    // constant, flatten the tree: (X+con1)*con0 ==> X*con0 + con1*con0
+    const Node *add1 = in(1);
+    if( add1->Opcode() == add_opcode() ) {      // Left input is an add?
+      // Add of a constant?
+      const Type *t12 = phase->type( add1->in(2) );
+      if( t12->singleton() && t12 != Type::TOP ) { // Left input is an add of a constant?
+        assert( add1->in(1) != add1, "dead loop in MulNode::Ideal" );
+        // Compute new constant; check for overflow
+        const Type *tcon01 = mul_ring(t2,t12);
+        if( tcon01->singleton() ) {
+
+        // Convert (X+con1)*con0 into X*con0
+          Node *mul = clone();    // mul = ()*con0
+          mul->set_req(1,add1->in(1));  // mul = X*con0
+          mul = phase->transform(mul);
+
+          Node *add2 = add1->clone();
+          add2->set_req(1, mul);        // X*con0 + con0*con1
+          add2->set_req(2, phase->makecon(tcon01) );
+          progress = add2;
+        }
+      }
+    } // End of is left input an add
+  } // End of is right input a Mul
+
+  return progress;
+}
+
+//------------------------------Value-----------------------------------------
+const Type *MulNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Either input is ZERO ==> the result is ZERO.
+  // Not valid for floats or doubles since +0.0 * -0.0 --> +0.0
+  int op = Opcode();
+  if( op == Op_MulI || op == Op_AndI || op == Op_MulL || op == Op_AndL ) {
+    const Type *zero = add_id();        // The multiplicative zero
+    if( t1->higher_equal( zero ) ) return zero;
+    if( t2->higher_equal( zero ) ) return zero;
+  }
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
+    return bottom_type();
+
+  return mul_ring(t1,t2);            // Local flavor of type multiplication
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Check for power-of-2 multiply, then try the regular MulNode::Ideal
+Node *MulINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Swap constant to right
+  jint con;
+  if ((con = in(1)->find_int_con(0)) != 0) {
+    swap_edges(1, 2);
+    // Finish rest of method to use info in 'con'
+  } else if ((con = in(2)->find_int_con(0)) == 0) {
+    return MulNode::Ideal(phase, can_reshape);
+  }
+
+  // Now we have a constant Node on the right and the constant in con
+  if( con == 0 ) return NULL;   // By zero is handled by Value call
+  if( con == 1 ) return NULL;   // By one  is handled by Identity call
+
+  // Check for negative constant; if so negate the final result
+  bool sign_flip = false;
+  if( con < 0 ) {
+    con = -con;
+    sign_flip = true;
+  }
+
+  // Get low bit; check for being the only bit
+  Node *res = NULL;
+  jint bit1 = con & -con;       // Extract low bit
+  if( bit1 == con ) {           // Found a power of 2?
+    res = new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit1)) );
+  } else {
+
+    // Check for constant with 2 bits set
+    jint bit2 = con-bit1;
+    bit2 = bit2 & -bit2;          // Extract 2nd bit
+    if( bit2 + bit1 == con ) {    // Found all bits in con?
+      Node *n1 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit1)) ) );
+      Node *n2 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(bit2)) ) );
+      res = new (phase->C, 3) AddINode( n2, n1 );
+
+    } else if (is_power_of_2(con+1)) {
+      // Sleezy: power-of-2 -1.  Next time be generic.
+      jint temp = (jint) (con + 1);
+      Node *n1 = phase->transform( new (phase->C, 3) LShiftINode( in(1), phase->intcon(log2_intptr(temp)) ) );
+      res = new (phase->C, 3) SubINode( n1, in(1) );
+    } else {
+      return MulNode::Ideal(phase, can_reshape);
+    }
+  }
+
+  if( sign_flip ) {             // Need to negate result?
+    res = phase->transform(res);// Transform, before making the zero con
+    res = new (phase->C, 3) SubINode(phase->intcon(0),res);
+  }
+
+  return res;                   // Return final result
+}
+
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two integer ranges into this node.
+const Type *MulINode::mul_ring(const Type *t0, const Type *t1) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+
+  // Fetch endpoints of all ranges
+  int32 lo0 = r0->_lo;
+  double a = (double)lo0;
+  int32 hi0 = r0->_hi;
+  double b = (double)hi0;
+  int32 lo1 = r1->_lo;
+  double c = (double)lo1;
+  int32 hi1 = r1->_hi;
+  double d = (double)hi1;
+
+  // Compute all endpoints & check for overflow
+  int32 A = lo0*lo1;
+  if( (double)A != a*c ) return TypeInt::INT; // Overflow?
+  int32 B = lo0*hi1;
+  if( (double)B != a*d ) return TypeInt::INT; // Overflow?
+  int32 C = hi0*lo1;
+  if( (double)C != b*c ) return TypeInt::INT; // Overflow?
+  int32 D = hi0*hi1;
+  if( (double)D != b*d ) return TypeInt::INT; // Overflow?
+
+  if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
+  else { lo0 = B; hi0 = A; }
+  if( C < D ) {
+    if( C < lo0 ) lo0 = C;
+    if( D > hi0 ) hi0 = D;
+  } else {
+    if( D < lo0 ) lo0 = D;
+    if( C > hi0 ) hi0 = C;
+  }
+  return TypeInt::make(lo0, hi0, MAX2(r0->_widen,r1->_widen));
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+// Check for power-of-2 multiply, then try the regular MulNode::Ideal
+Node *MulLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Swap constant to right
+  jlong con;
+  if ((con = in(1)->find_long_con(0)) != 0) {
+    swap_edges(1, 2);
+    // Finish rest of method to use info in 'con'
+  } else if ((con = in(2)->find_long_con(0)) == 0) {
+    return MulNode::Ideal(phase, can_reshape);
+  }
+
+  // Now we have a constant Node on the right and the constant in con
+  if( con == CONST64(0) ) return NULL;  // By zero is handled by Value call
+  if( con == CONST64(1) ) return NULL;  // By one  is handled by Identity call
+
+  // Check for negative constant; if so negate the final result
+  bool sign_flip = false;
+  if( con < 0 ) {
+    con = -con;
+    sign_flip = true;
+  }
+
+  // Get low bit; check for being the only bit
+  Node *res = NULL;
+  jlong bit1 = con & -con;      // Extract low bit
+  if( bit1 == con ) {           // Found a power of 2?
+    res = new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit1)) );
+  } else {
+
+    // Check for constant with 2 bits set
+    jlong bit2 = con-bit1;
+    bit2 = bit2 & -bit2;          // Extract 2nd bit
+    if( bit2 + bit1 == con ) {    // Found all bits in con?
+      Node *n1 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit1)) ) );
+      Node *n2 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(bit2)) ) );
+      res = new (phase->C, 3) AddLNode( n2, n1 );
+
+    } else if (is_power_of_2_long(con+1)) {
+      // Sleezy: power-of-2 -1.  Next time be generic.
+      jlong temp = (jlong) (con + 1);
+      Node *n1 = phase->transform( new (phase->C, 3) LShiftLNode( in(1), phase->intcon(log2_long(temp)) ) );
+      res = new (phase->C, 3) SubLNode( n1, in(1) );
+    } else {
+      return MulNode::Ideal(phase, can_reshape);
+    }
+  }
+
+  if( sign_flip ) {             // Need to negate result?
+    res = phase->transform(res);// Transform, before making the zero con
+    res = new (phase->C, 3) SubLNode(phase->longcon(0),res);
+  }
+
+  return res;                   // Return final result
+}
+
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two integer ranges into this node.
+const Type *MulLNode::mul_ring(const Type *t0, const Type *t1) const {
+  const TypeLong *r0 = t0->is_long(); // Handy access
+  const TypeLong *r1 = t1->is_long();
+
+  // Fetch endpoints of all ranges
+  jlong lo0 = r0->_lo;
+  double a = (double)lo0;
+  jlong hi0 = r0->_hi;
+  double b = (double)hi0;
+  jlong lo1 = r1->_lo;
+  double c = (double)lo1;
+  jlong hi1 = r1->_hi;
+  double d = (double)hi1;
+
+  // Compute all endpoints & check for overflow
+  jlong A = lo0*lo1;
+  if( (double)A != a*c ) return TypeLong::LONG; // Overflow?
+  jlong B = lo0*hi1;
+  if( (double)B != a*d ) return TypeLong::LONG; // Overflow?
+  jlong C = hi0*lo1;
+  if( (double)C != b*c ) return TypeLong::LONG; // Overflow?
+  jlong D = hi0*hi1;
+  if( (double)D != b*d ) return TypeLong::LONG; // Overflow?
+
+  if( A < B ) { lo0 = A; hi0 = B; } // Sort range endpoints
+  else { lo0 = B; hi0 = A; }
+  if( C < D ) {
+    if( C < lo0 ) lo0 = C;
+    if( D > hi0 ) hi0 = D;
+  } else {
+    if( D < lo0 ) lo0 = D;
+    if( C > hi0 ) hi0 = C;
+  }
+  return TypeLong::make(lo0, hi0, MAX2(r0->_widen,r1->_widen));
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two double ranges into this node.
+const Type *MulFNode::mul_ring(const Type *t0, const Type *t1) const {
+  if( t0 == Type::FLOAT || t1 == Type::FLOAT ) return Type::FLOAT;
+  return TypeF::make( t0->getf() * t1->getf() );
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Compute the product type of two double ranges into this node.
+const Type *MulDNode::mul_ring(const Type *t0, const Type *t1) const {
+  if( t0 == Type::DOUBLE || t1 == Type::DOUBLE ) return Type::DOUBLE;
+  // We must be adding 2 double constants.
+  return TypeD::make( t0->getd() * t1->getd() );
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Supplied function returns the product of the inputs IN THE CURRENT RING.
+// For the logical operations the ring's MUL is really a logical AND function.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AndINode::mul_ring( const Type *t0, const Type *t1 ) const {
+  const TypeInt *r0 = t0->is_int(); // Handy access
+  const TypeInt *r1 = t1->is_int();
+  int widen = MAX2(r0->_widen,r1->_widen);
+
+  // If either input is a constant, might be able to trim cases
+  if( !r0->is_con() && !r1->is_con() )
+    return TypeInt::INT;        // No constants to be had
+
+  // Both constants?  Return bits
+  if( r0->is_con() && r1->is_con() )
+    return TypeInt::make( r0->get_con() & r1->get_con() );
+
+  if( r0->is_con() && r0->get_con() > 0 )
+    return TypeInt::make(0, r0->get_con(), widen);
+
+  if( r1->is_con() && r1->get_con() > 0 )
+    return TypeInt::make(0, r1->get_con(), widen);
+
+  if( r0 == TypeInt::BOOL || r1 == TypeInt::BOOL ) {
+    return TypeInt::BOOL;
+  }
+
+  return TypeInt::INT;          // No constants to be had
+}
+
+//------------------------------Identity---------------------------------------
+// Masking off the high bits of an unsigned load is not required
+Node *AndINode::Identity( PhaseTransform *phase ) {
+
+  // x & x => x
+  if (phase->eqv(in(1), in(2))) return in(1);
+
+  Node *load = in(1);
+  const TypeInt *t2 = phase->type( in(2) )->isa_int();
+  if( t2 && t2->is_con() ) {
+    int con = t2->get_con();
+    // Masking off high bits which are always zero is useless.
+    const TypeInt* t1 = phase->type( in(1) )->isa_int();
+    if (t1 != NULL && t1->_lo >= 0) {
+      jint t1_support = ((jint)1 << (1 + log2_intptr(t1->_hi))) - 1;
+      if ((t1_support & con) == t1_support)
+        return load;
+    }
+    uint lop = load->Opcode();
+    if( lop == Op_LoadC &&
+        con == 0x0000FFFF )     // Already zero-extended
+      return load;
+    // Masking off the high bits of a unsigned-shift-right is not
+    // needed either.
+    if( lop == Op_URShiftI ) {
+      const TypeInt *t12 = phase->type( load->in(2) )->isa_int();
+      if( t12 && t12->is_con() ) {
+        int shift_con = t12->get_con();
+        int mask = max_juint >> shift_con;
+        if( (mask&con) == mask )  // If AND is useless, skip it
+          return load;
+      }
+    }
+  }
+  return MulNode::Identity(phase);
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AndINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Special case constant AND mask
+  const TypeInt *t2 = phase->type( in(2) )->isa_int();
+  if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
+  const int mask = t2->get_con();
+  Node *load = in(1);
+  uint lop = load->Opcode();
+
+  // Masking bits off of a Character?  Hi bits are already zero.
+  if( lop == Op_LoadC &&
+      (mask & 0xFFFF0000) )     // Can we make a smaller mask?
+    return new (phase->C, 3) AndINode(load,phase->intcon(mask&0xFFFF));
+
+  // Masking bits off of a Short?  Loading a Character does some masking
+  if( lop == Op_LoadS &&
+      (mask & 0xFFFF0000) == 0 ) {
+    Node *ldc = new (phase->C, 3) LoadCNode(load->in(MemNode::Control),
+                                  load->in(MemNode::Memory),
+                                  load->in(MemNode::Address),
+                                  load->adr_type());
+    ldc = phase->transform(ldc);
+    return new (phase->C, 3) AndINode(ldc,phase->intcon(mask&0xFFFF));
+  }
+
+  // Masking sign bits off of a Byte?  Let the matcher use an unsigned load
+  if( lop == Op_LoadB &&
+      (!in(0) && load->in(0)) &&
+      (mask == 0x000000FF) ) {
+    // Associate this node with the LoadB, so the matcher can see them together.
+    // If we don't do this, it is common for the LoadB to have one control
+    // edge, and the store or call containing this AndI to have a different
+    // control edge.  This will cause Label_Root to group the AndI with
+    // the encoding store or call, so the matcher has no chance to match
+    // this AndI together with the LoadB.  Setting the control edge here
+    // prevents Label_Root from grouping the AndI with the store or call,
+    // if it has a control edge that is inconsistent with the LoadB.
+    set_req(0, load->in(0));
+    return this;
+  }
+
+  // Masking off sign bits?  Dont make them!
+  if( lop == Op_RShiftI ) {
+    const TypeInt *t12 = phase->type(load->in(2))->isa_int();
+    if( t12 && t12->is_con() ) { // Shift is by a constant
+      int shift = t12->get_con();
+      shift &= BitsPerJavaInteger-1;  // semantics of Java shifts
+      const int sign_bits_mask = ~right_n_bits(BitsPerJavaInteger - shift);
+      // If the AND'ing of the 2 masks has no bits, then only original shifted
+      // bits survive.  NO sign-extension bits survive the maskings.
+      if( (sign_bits_mask & mask) == 0 ) {
+        // Use zero-fill shift instead
+        Node *zshift = phase->transform(new (phase->C, 3) URShiftINode(load->in(1),load->in(2)));
+        return new (phase->C, 3) AndINode( zshift, in(2) );
+      }
+    }
+  }
+
+  // Check for 'negate/and-1', a pattern emitted when someone asks for
+  // 'mod 2'.  Negate leaves the low order bit unchanged (think: complement
+  // plus 1) and the mask is of the low order bit.  Skip the negate.
+  if( lop == Op_SubI && mask == 1 && load->in(1) &&
+      phase->type(load->in(1)) == TypeInt::ZERO )
+    return new (phase->C, 3) AndINode( load->in(2), in(2) );
+
+  return MulNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------mul_ring---------------------------------------
+// Supplied function returns the product of the inputs IN THE CURRENT RING.
+// For the logical operations the ring's MUL is really a logical AND function.
+// This also type-checks the inputs for sanity.  Guaranteed never to
+// be passed a TOP or BOTTOM type, these are filtered out by pre-check.
+const Type *AndLNode::mul_ring( const Type *t0, const Type *t1 ) const {
+  const TypeLong *r0 = t0->is_long(); // Handy access
+  const TypeLong *r1 = t1->is_long();
+  int widen = MAX2(r0->_widen,r1->_widen);
+
+  // If either input is a constant, might be able to trim cases
+  if( !r0->is_con() && !r1->is_con() )
+    return TypeLong::LONG;      // No constants to be had
+
+  // Both constants?  Return bits
+  if( r0->is_con() && r1->is_con() )
+    return TypeLong::make( r0->get_con() & r1->get_con() );
+
+  if( r0->is_con() && r0->get_con() > 0 )
+    return TypeLong::make(CONST64(0), r0->get_con(), widen);
+
+  if( r1->is_con() && r1->get_con() > 0 )
+    return TypeLong::make(CONST64(0), r1->get_con(), widen);
+
+  return TypeLong::LONG;        // No constants to be had
+}
+
+//------------------------------Identity---------------------------------------
+// Masking off the high bits of an unsigned load is not required
+Node *AndLNode::Identity( PhaseTransform *phase ) {
+
+  // x & x => x
+  if (phase->eqv(in(1), in(2))) return in(1);
+
+  Node *usr = in(1);
+  const TypeLong *t2 = phase->type( in(2) )->isa_long();
+  if( t2 && t2->is_con() ) {
+    jlong con = t2->get_con();
+    // Masking off high bits which are always zero is useless.
+    const TypeLong* t1 = phase->type( in(1) )->isa_long();
+    if (t1 != NULL && t1->_lo >= 0) {
+      jlong t1_support = ((jlong)1 << (1 + log2_long(t1->_hi))) - 1;
+      if ((t1_support & con) == t1_support)
+        return usr;
+    }
+    uint lop = usr->Opcode();
+    // Masking off the high bits of a unsigned-shift-right is not
+    // needed either.
+    if( lop == Op_URShiftL ) {
+      const TypeInt *t12 = phase->type( usr->in(2) )->isa_int();
+      if( t12 && t12->is_con() ) {
+        int shift_con = t12->get_con();
+        jlong mask = max_julong >> shift_con;
+        if( (mask&con) == mask )  // If AND is useless, skip it
+          return usr;
+      }
+    }
+  }
+  return MulNode::Identity(phase);
+}
+
+//------------------------------Ideal------------------------------------------
+Node *AndLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Special case constant AND mask
+  const TypeLong *t2 = phase->type( in(2) )->isa_long();
+  if( !t2 || !t2->is_con() ) return MulNode::Ideal(phase, can_reshape);
+  const jlong mask = t2->get_con();
+
+  Node *rsh = in(1);
+  uint rop = rsh->Opcode();
+
+  // Masking off sign bits?  Dont make them!
+  if( rop == Op_RShiftL ) {
+    const TypeInt *t12 = phase->type(rsh->in(2))->isa_int();
+    if( t12 && t12->is_con() ) { // Shift is by a constant
+      int shift = t12->get_con();
+      shift &= (BitsPerJavaInteger*2)-1;  // semantics of Java shifts
+      const jlong sign_bits_mask = ~(((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - shift)) -1);
+      // If the AND'ing of the 2 masks has no bits, then only original shifted
+      // bits survive.  NO sign-extension bits survive the maskings.
+      if( (sign_bits_mask & mask) == 0 ) {
+        // Use zero-fill shift instead
+        Node *zshift = phase->transform(new (phase->C, 3) URShiftLNode(rsh->in(1),rsh->in(2)));
+        return new (phase->C, 3) AndLNode( zshift, in(2) );
+      }
+    }
+  }
+
+  return MulNode::Ideal(phase, can_reshape);
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *LShiftINode::Identity( PhaseTransform *phase ) {
+  const TypeInt *ti = phase->type( in(2) )->isa_int();  // shift count is an int
+  return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerInt - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the right input is a constant, and the left input is an add of a
+// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
+Node *LShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const Type *t  = phase->type( in(2) );
+  if( t == Type::TOP ) return NULL;       // Right input is dead
+  const TypeInt *t2 = t->isa_int();
+  if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+  const int con = t2->get_con() & ( BitsPerInt - 1 );  // masked shift count
+
+  if ( con == 0 )  return NULL; // let Identity() handle 0 shift count
+
+  // Left input is an add of a constant?
+  Node *add1 = in(1);
+  int add1_op = add1->Opcode();
+  if( add1_op == Op_AddI ) {    // Left input is an add?
+    assert( add1 != add1->in(1), "dead loop in LShiftINode::Ideal" );
+    const TypeInt *t12 = phase->type(add1->in(2))->isa_int();
+    if( t12 && t12->is_con() ){ // Left input is an add of a con?
+      // Transform is legal, but check for profit.  Avoid breaking 'i2s'
+      // and 'i2b' patterns which typically fold into 'StoreC/StoreB'.
+      if( con < 16 ) {
+        // Compute X << con0
+        Node *lsh = phase->transform( new (phase->C, 3) LShiftINode( add1->in(1), in(2) ) );
+        // Compute X<<con0 + (con1<<con0)
+        return new (phase->C, 3) AddINode( lsh, phase->intcon(t12->get_con() << con));
+      }
+    }
+  }
+
+  // Check for "(x>>c0)<<c0" which just masks off low bits
+  if( (add1_op == Op_RShiftI || add1_op == Op_URShiftI ) &&
+      add1->in(2) == in(2) )
+    // Convert to "(x & -(1<<c0))"
+    return new (phase->C, 3) AndINode(add1->in(1),phase->intcon( -(1<<con)));
+
+  // Check for "((x>>c0) & Y)<<c0" which just masks off more low bits
+  if( add1_op == Op_AndI ) {
+    Node *add2 = add1->in(1);
+    int add2_op = add2->Opcode();
+    if( (add2_op == Op_RShiftI || add2_op == Op_URShiftI ) &&
+        add2->in(2) == in(2) ) {
+      // Convert to "(x & (Y<<c0))"
+      Node *y_sh = phase->transform( new (phase->C, 3) LShiftINode( add1->in(2), in(2) ) );
+      return new (phase->C, 3) AndINode( add2->in(1), y_sh );
+    }
+  }
+
+  // Check for ((x & ((1<<(32-c0))-1)) << c0) which ANDs off high bits
+  // before shifting them away.
+  const jint bits_mask = right_n_bits(BitsPerJavaInteger-con);
+  if( add1_op == Op_AndI &&
+      phase->type(add1->in(2)) == TypeInt::make( bits_mask ) )
+    return new (phase->C, 3) LShiftINode( add1->in(1), in(2) );
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A LShiftINode shifts its input2 left by input1 amount.
+const Type *LShiftINode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if( (t1 == TypeInt::INT) || (t2 == TypeInt::INT) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return TypeInt::INT;
+
+  const TypeInt *r1 = t1->is_int(); // Handy access
+  const TypeInt *r2 = t2->is_int(); // Handy access
+
+  if (!r2->is_con())
+    return TypeInt::INT;
+
+  uint shift = r2->get_con();
+  shift &= BitsPerJavaInteger-1;  // semantics of Java shifts
+  // Shift by a multiple of 32 does nothing:
+  if (shift == 0)  return t1;
+
+  // If the shift is a constant, shift the bounds of the type,
+  // unless this could lead to an overflow.
+  if (!r1->is_con()) {
+    jint lo = r1->_lo, hi = r1->_hi;
+    if (((lo << shift) >> shift) == lo &&
+        ((hi << shift) >> shift) == hi) {
+      // No overflow.  The range shifts up cleanly.
+      return TypeInt::make((jint)lo << (jint)shift,
+                           (jint)hi << (jint)shift,
+                           MAX2(r1->_widen,r2->_widen));
+    }
+    return TypeInt::INT;
+  }
+
+  return TypeInt::make( (jint)r1->get_con() << (jint)shift );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *LShiftLNode::Identity( PhaseTransform *phase ) {
+  const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+  return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+// If the right input is a constant, and the left input is an add of a
+// constant, flatten the tree: (X+con1)<<con0 ==> X<<con0 + con1<<con0
+Node *LShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const Type *t  = phase->type( in(2) );
+  if( t == Type::TOP ) return NULL;       // Right input is dead
+  const TypeInt *t2 = t->isa_int();
+  if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+  const int con = t2->get_con() & ( BitsPerLong - 1 );  // masked shift count
+
+  if ( con == 0 ) return NULL;  // let Identity() handle 0 shift count
+
+  // Left input is an add of a constant?
+  Node *add1 = in(1);
+  int add1_op = add1->Opcode();
+  if( add1_op == Op_AddL ) {    // Left input is an add?
+    // Avoid dead data cycles from dead loops
+    assert( add1 != add1->in(1), "dead loop in LShiftLNode::Ideal" );
+    const TypeLong *t12 = phase->type(add1->in(2))->isa_long();
+    if( t12 && t12->is_con() ){ // Left input is an add of a con?
+      // Compute X << con0
+      Node *lsh = phase->transform( new (phase->C, 3) LShiftLNode( add1->in(1), in(2) ) );
+      // Compute X<<con0 + (con1<<con0)
+      return new (phase->C, 3) AddLNode( lsh, phase->longcon(t12->get_con() << con));
+    }
+  }
+
+  // Check for "(x>>c0)<<c0" which just masks off low bits
+  if( (add1_op == Op_RShiftL || add1_op == Op_URShiftL ) &&
+      add1->in(2) == in(2) )
+    // Convert to "(x & -(1<<c0))"
+    return new (phase->C, 3) AndLNode(add1->in(1),phase->longcon( -(CONST64(1)<<con)));
+
+  // Check for "((x>>c0) & Y)<<c0" which just masks off more low bits
+  if( add1_op == Op_AndL ) {
+    Node *add2 = add1->in(1);
+    int add2_op = add2->Opcode();
+    if( (add2_op == Op_RShiftL || add2_op == Op_URShiftL ) &&
+        add2->in(2) == in(2) ) {
+      // Convert to "(x & (Y<<c0))"
+      Node *y_sh = phase->transform( new (phase->C, 3) LShiftLNode( add1->in(2), in(2) ) );
+      return new (phase->C, 3) AndLNode( add2->in(1), y_sh );
+    }
+  }
+
+  // Check for ((x & ((CONST64(1)<<(64-c0))-1)) << c0) which ANDs off high bits
+  // before shifting them away.
+  const jlong bits_mask = ((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) - CONST64(1);
+  if( add1_op == Op_AndL &&
+      phase->type(add1->in(2)) == TypeLong::make( bits_mask ) )
+    return new (phase->C, 3) LShiftLNode( add1->in(1), in(2) );
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A LShiftLNode shifts its input2 left by input1 amount.
+const Type *LShiftLNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if( (t1 == TypeLong::LONG) || (t2 == TypeInt::INT) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return TypeLong::LONG;
+
+  const TypeLong *r1 = t1->is_long(); // Handy access
+  const TypeInt  *r2 = t2->is_int();  // Handy access
+
+  if (!r2->is_con())
+    return TypeLong::LONG;
+
+  uint shift = r2->get_con();
+  shift &= (BitsPerJavaInteger*2)-1;  // semantics of Java shifts
+  // Shift by a multiple of 64 does nothing:
+  if (shift == 0)  return t1;
+
+  // If the shift is a constant, shift the bounds of the type,
+  // unless this could lead to an overflow.
+  if (!r1->is_con()) {
+    jlong lo = r1->_lo, hi = r1->_hi;
+    if (((lo << shift) >> shift) == lo &&
+        ((hi << shift) >> shift) == hi) {
+      // No overflow.  The range shifts up cleanly.
+      return TypeLong::make((jlong)lo << (jint)shift,
+                            (jlong)hi << (jint)shift,
+                            MAX2(r1->_widen,r2->_widen));
+    }
+    return TypeLong::LONG;
+  }
+
+  return TypeLong::make( (jlong)r1->get_con() << (jint)shift );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *RShiftINode::Identity( PhaseTransform *phase ) {
+  const TypeInt *t2 = phase->type(in(2))->isa_int();
+  if( !t2 ) return this;
+  if ( t2->is_con() && ( t2->get_con() & ( BitsPerInt - 1 ) ) == 0 )
+    return in(1);
+
+  // Check for useless sign-masking
+  if( in(1)->Opcode() == Op_LShiftI &&
+      in(1)->req() == 3 &&
+      in(1)->in(2) == in(2) &&
+      t2->is_con() ) {
+    uint shift = t2->get_con();
+    shift &= BitsPerJavaInteger-1; // semantics of Java shifts
+    // Compute masks for which this shifting doesn't change
+    int lo = (-1 << (BitsPerJavaInteger - shift-1)); // FFFF8000
+    int hi = ~lo;               // 00007FFF
+    const TypeInt *t11 = phase->type(in(1)->in(1))->isa_int();
+    if( !t11 ) return this;
+    // Does actual value fit inside of mask?
+    if( lo <= t11->_lo && t11->_hi <= hi )
+      return in(1)->in(1);      // Then shifting is a nop
+  }
+
+  return this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *RShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Inputs may be TOP if they are dead.
+  const TypeInt *t1 = phase->type( in(1) )->isa_int();
+  if( !t1 ) return NULL;        // Left input is an integer
+  const TypeInt *t2 = phase->type( in(2) )->isa_int();
+  if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+  const TypeInt *t3;  // type of in(1).in(2)
+  int shift = t2->get_con();
+  shift &= BitsPerJavaInteger-1;  // semantics of Java shifts
+
+  if ( shift == 0 ) return NULL;  // let Identity() handle 0 shift count
+
+  // Check for (x & 0xFF000000) >> 24, whose mask can be made smaller.
+  // Such expressions arise normally from shift chains like (byte)(x >> 24).
+  const Node *mask = in(1);
+  if( mask->Opcode() == Op_AndI &&
+      (t3 = phase->type(mask->in(2))->isa_int()) &&
+      t3->is_con() ) {
+    Node *x = mask->in(1);
+    jint maskbits = t3->get_con();
+    // Convert to "(x >> shift) & (mask >> shift)"
+    Node *shr_nomask = phase->transform( new (phase->C, 3) RShiftINode(mask->in(1), in(2)) );
+    return new (phase->C, 3) AndINode(shr_nomask, phase->intcon( maskbits >> shift));
+  }
+
+  // Check for "(short[i] <<16)>>16" which simply sign-extends
+  const Node *shl = in(1);
+  if( shl->Opcode() != Op_LShiftI ) return NULL;
+
+  if( shift == 16 &&
+      (t3 = phase->type(shl->in(2))->isa_int()) &&
+      t3->is_con(16) ) {
+    Node *ld = shl->in(1);
+    if( ld->Opcode() == Op_LoadS ) {
+      // Sign extension is just useless here.  Return a RShiftI of zero instead
+      // returning 'ld' directly.  We cannot return an old Node directly as
+      // that is the job of 'Identity' calls and Identity calls only work on
+      // direct inputs ('ld' is an extra Node removed from 'this').  The
+      // combined optimization requires Identity only return direct inputs.
+      set_req(1, ld);
+      set_req(2, phase->intcon(0));
+      return this;
+    }
+    else if( ld->Opcode() == Op_LoadC )
+      // Replace zero-extension-load with sign-extension-load
+      return new (phase->C, 3) LoadSNode( ld->in(MemNode::Control),
+                                ld->in(MemNode::Memory),
+                                ld->in(MemNode::Address),
+                                ld->adr_type());
+  }
+
+  // Check for "(byte[i] <<24)>>24" which simply sign-extends
+  if( shift == 24 &&
+      (t3 = phase->type(shl->in(2))->isa_int()) &&
+      t3->is_con(24) ) {
+    Node *ld = shl->in(1);
+    if( ld->Opcode() == Op_LoadB ) {
+      // Sign extension is just useless here
+      set_req(1, ld);
+      set_req(2, phase->intcon(0));
+      return this;
+    }
+  }
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A RShiftINode shifts its input2 right by input1 amount.
+const Type *RShiftINode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+    return TypeInt::INT;
+
+  if (t2 == TypeInt::INT)
+    return TypeInt::INT;
+
+  const TypeInt *r1 = t1->is_int(); // Handy access
+  const TypeInt *r2 = t2->is_int(); // Handy access
+
+  // If the shift is a constant, just shift the bounds of the type.
+  // For example, if the shift is 31, we just propagate sign bits.
+  if (r2->is_con()) {
+    uint shift = r2->get_con();
+    shift &= BitsPerJavaInteger-1;  // semantics of Java shifts
+    // Shift by a multiple of 32 does nothing:
+    if (shift == 0)  return t1;
+    // Calculate reasonably aggressive bounds for the result.
+    // This is necessary if we are to correctly type things
+    // like (x<<24>>24) == ((byte)x).
+    jint lo = (jint)r1->_lo >> (jint)shift;
+    jint hi = (jint)r1->_hi >> (jint)shift;
+    assert(lo <= hi, "must have valid bounds");
+    const TypeInt* ti = TypeInt::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+#ifdef ASSERT
+    // Make sure we get the sign-capture idiom correct.
+    if (shift == BitsPerJavaInteger-1) {
+      if (r1->_lo >= 0) assert(ti == TypeInt::ZERO,    ">>31 of + is  0");
+      if (r1->_hi <  0) assert(ti == TypeInt::MINUS_1, ">>31 of - is -1");
+    }
+#endif
+    return ti;
+  }
+
+  if( !r1->is_con() || !r2->is_con() )
+    return TypeInt::INT;
+
+  // Signed shift right
+  return TypeInt::make( r1->get_con() >> (r2->get_con()&31) );
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *RShiftLNode::Identity( PhaseTransform *phase ) {
+  const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+  return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// A RShiftLNode shifts its input2 right by input1 amount.
+const Type *RShiftLNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+    return TypeLong::LONG;
+
+  if (t2 == TypeInt::INT)
+    return TypeLong::LONG;
+
+  const TypeLong *r1 = t1->is_long(); // Handy access
+  const TypeInt  *r2 = t2->is_int (); // Handy access
+
+  // If the shift is a constant, just shift the bounds of the type.
+  // For example, if the shift is 63, we just propagate sign bits.
+  if (r2->is_con()) {
+    uint shift = r2->get_con();
+    shift &= (2*BitsPerJavaInteger)-1;  // semantics of Java shifts
+    // Shift by a multiple of 64 does nothing:
+    if (shift == 0)  return t1;
+    // Calculate reasonably aggressive bounds for the result.
+    // This is necessary if we are to correctly type things
+    // like (x<<24>>24) == ((byte)x).
+    jlong lo = (jlong)r1->_lo >> (jlong)shift;
+    jlong hi = (jlong)r1->_hi >> (jlong)shift;
+    assert(lo <= hi, "must have valid bounds");
+    const TypeLong* tl = TypeLong::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+    #ifdef ASSERT
+    // Make sure we get the sign-capture idiom correct.
+    if (shift == (2*BitsPerJavaInteger)-1) {
+      if (r1->_lo >= 0) assert(tl == TypeLong::ZERO,    ">>63 of + is 0");
+      if (r1->_hi < 0)  assert(tl == TypeLong::MINUS_1, ">>63 of - is -1");
+    }
+    #endif
+    return tl;
+  }
+
+  return TypeLong::LONG;                // Give up
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *URShiftINode::Identity( PhaseTransform *phase ) {
+  const TypeInt *ti = phase->type( in(2) )->isa_int();
+  if ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerInt - 1 ) ) == 0 ) return in(1);
+
+  // Check for "((x << LogBytesPerWord) + (wordSize-1)) >> LogBytesPerWord" which is just "x".
+  // Happens during new-array length computation.
+  // Safe if 'x' is in the range [0..(max_int>>LogBytesPerWord)]
+  Node *add = in(1);
+  if( add->Opcode() == Op_AddI ) {
+    const TypeInt *t2  = phase->type(add->in(2))->isa_int();
+    if( t2 && t2->is_con(wordSize - 1) &&
+        add->in(1)->Opcode() == Op_LShiftI ) {
+      // Check that shift_counts are LogBytesPerWord
+      Node          *lshift_count   = add->in(1)->in(2);
+      const TypeInt *t_lshift_count = phase->type(lshift_count)->isa_int();
+      if( t_lshift_count && t_lshift_count->is_con(LogBytesPerWord) &&
+          t_lshift_count == phase->type(in(2)) ) {
+        Node          *x   = add->in(1)->in(1);
+        const TypeInt *t_x = phase->type(x)->isa_int();
+        if( t_x != NULL && 0 <= t_x->_lo && t_x->_hi <= (max_jint>>LogBytesPerWord) ) {
+          return x;
+        }
+      }
+    }
+  }
+
+  return (phase->type(in(2))->higher_equal(TypeInt::ZERO)) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *URShiftINode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const TypeInt *t2 = phase->type( in(2) )->isa_int();
+  if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+  const int con = t2->get_con() & 31; // Shift count is always masked
+  if ( con == 0 ) return NULL;  // let Identity() handle a 0 shift count
+  // We'll be wanting the right-shift amount as a mask of that many bits
+  const int mask = right_n_bits(BitsPerJavaInteger - con);
+
+  int in1_op = in(1)->Opcode();
+
+  // Check for ((x>>>a)>>>b) and replace with (x>>>(a+b)) when a+b < 32
+  if( in1_op == Op_URShiftI ) {
+    const TypeInt *t12 = phase->type( in(1)->in(2) )->isa_int();
+    if( t12 && t12->is_con() ) { // Right input is a constant
+      assert( in(1) != in(1)->in(1), "dead loop in URShiftINode::Ideal" );
+      const int con2 = t12->get_con() & 31; // Shift count is always masked
+      const int con3 = con+con2;
+      if( con3 < 32 )           // Only merge shifts if total is < 32
+        return new (phase->C, 3) URShiftINode( in(1)->in(1), phase->intcon(con3) );
+    }
+  }
+
+  // Check for ((x << z) + Y) >>> z.  Replace with x + con>>>z
+  // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
+  // If Q is "X << z" the rounding is useless.  Look for patterns like
+  // ((X<<Z) + Y) >>> Z  and replace with (X + Y>>>Z) & Z-mask.
+  Node *add = in(1);
+  if( in1_op == Op_AddI ) {
+    Node *lshl = add->in(1);
+    if( lshl->Opcode() == Op_LShiftI &&
+        phase->type(lshl->in(2)) == t2 ) {
+      Node *y_z = phase->transform( new (phase->C, 3) URShiftINode(add->in(2),in(2)) );
+      Node *sum = phase->transform( new (phase->C, 3) AddINode( lshl->in(1), y_z ) );
+      return new (phase->C, 3) AndINode( sum, phase->intcon(mask) );
+    }
+  }
+
+  // Check for (x & mask) >>> z.  Replace with (x >>> z) & (mask >>> z)
+  // This shortens the mask.  Also, if we are extracting a high byte and
+  // storing it to a buffer, the mask will be removed completely.
+  Node *andi = in(1);
+  if( in1_op == Op_AndI ) {
+    const TypeInt *t3 = phase->type( andi->in(2) )->isa_int();
+    if( t3 && t3->is_con() ) { // Right input is a constant
+      jint mask2 = t3->get_con();
+      mask2 >>= con;  // *signed* shift downward (high-order zeroes do not help)
+      Node *newshr = phase->transform( new (phase->C, 3) URShiftINode(andi->in(1), in(2)) );
+      return new (phase->C, 3) AndINode(newshr, phase->intcon(mask2));
+      // The negative values are easier to materialize than positive ones.
+      // A typical case from address arithmetic is ((x & ~15) >> 4).
+      // It's better to change that to ((x >> 4) & ~0) versus
+      // ((x >> 4) & 0x0FFFFFFF).  The difference is greatest in LP64.
+    }
+  }
+
+  // Check for "(X << z ) >>> z" which simply zero-extends
+  Node *shl = in(1);
+  if( in1_op == Op_LShiftI &&
+      phase->type(shl->in(2)) == t2 )
+    return new (phase->C, 3) AndINode( shl->in(1), phase->intcon(mask) );
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A URShiftINode shifts its input2 right by input1 amount.
+const Type *URShiftINode::Value( PhaseTransform *phase ) const {
+  // (This is a near clone of RShiftINode::Value.)
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeInt::ZERO ) return TypeInt::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+    return TypeInt::INT;
+
+  if (t2 == TypeInt::INT)
+    return TypeInt::INT;
+
+  const TypeInt *r1 = t1->is_int();     // Handy access
+  const TypeInt *r2 = t2->is_int();     // Handy access
+
+  if (r2->is_con()) {
+    uint shift = r2->get_con();
+    shift &= BitsPerJavaInteger-1;  // semantics of Java shifts
+    // Shift by a multiple of 32 does nothing:
+    if (shift == 0)  return t1;
+    // Calculate reasonably aggressive bounds for the result.
+    jint lo = (juint)r1->_lo >> (juint)shift;
+    jint hi = (juint)r1->_hi >> (juint)shift;
+    if (r1->_hi >= 0 && r1->_lo < 0) {
+      // If the type has both negative and positive values,
+      // there are two separate sub-domains to worry about:
+      // The positive half and the negative half.
+      jint neg_lo = lo;
+      jint neg_hi = (juint)-1 >> (juint)shift;
+      jint pos_lo = (juint) 0 >> (juint)shift;
+      jint pos_hi = hi;
+      lo = MIN2(neg_lo, pos_lo);  // == 0
+      hi = MAX2(neg_hi, pos_hi);  // == -1 >>> shift;
+    }
+    assert(lo <= hi, "must have valid bounds");
+    const TypeInt* ti = TypeInt::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+    #ifdef ASSERT
+    // Make sure we get the sign-capture idiom correct.
+    if (shift == BitsPerJavaInteger-1) {
+      if (r1->_lo >= 0) assert(ti == TypeInt::ZERO, ">>>31 of + is 0");
+      if (r1->_hi < 0)  assert(ti == TypeInt::ONE,  ">>>31 of - is +1");
+    }
+    #endif
+    return ti;
+  }
+
+  //
+  // Do not support shifted oops in info for GC
+  //
+  // else if( t1->base() == Type::InstPtr ) {
+  //
+  //   const TypeInstPtr *o = t1->is_instptr();
+  //   if( t1->singleton() )
+  //     return TypeInt::make( ((uint32)o->const_oop() + o->_offset) >> shift );
+  // }
+  // else if( t1->base() == Type::KlassPtr ) {
+  //   const TypeKlassPtr *o = t1->is_klassptr();
+  //   if( t1->singleton() )
+  //     return TypeInt::make( ((uint32)o->const_oop() + o->_offset) >> shift );
+  // }
+
+  return TypeInt::INT;
+}
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+Node *URShiftLNode::Identity( PhaseTransform *phase ) {
+  const TypeInt *ti = phase->type( in(2) )->isa_int(); // shift count is an int
+  return ( ti && ti->is_con() && ( ti->get_con() & ( BitsPerLong - 1 ) ) == 0 ) ? in(1) : this;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *URShiftLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const TypeInt *t2 = phase->type( in(2) )->isa_int();
+  if( !t2 || !t2->is_con() ) return NULL; // Right input is a constant
+  const int con = t2->get_con() & ( BitsPerLong - 1 ); // Shift count is always masked
+  if ( con == 0 ) return NULL;  // let Identity() handle a 0 shift count
+                              // note: mask computation below does not work for 0 shift count
+  // We'll be wanting the right-shift amount as a mask of that many bits
+  const jlong mask = (((jlong)CONST64(1) << (jlong)(BitsPerJavaInteger*2 - con)) -1);
+
+  // Check for ((x << z) + Y) >>> z.  Replace with x + con>>>z
+  // The idiom for rounding to a power of 2 is "(Q+(2^z-1)) >>> z".
+  // If Q is "X << z" the rounding is useless.  Look for patterns like
+  // ((X<<Z) + Y) >>> Z  and replace with (X + Y>>>Z) & Z-mask.
+  Node *add = in(1);
+  if( add->Opcode() == Op_AddL ) {
+    Node *lshl = add->in(1);
+    if( lshl->Opcode() == Op_LShiftL &&
+        phase->type(lshl->in(2)) == t2 ) {
+      Node *y_z = phase->transform( new (phase->C, 3) URShiftLNode(add->in(2),in(2)) );
+      Node *sum = phase->transform( new (phase->C, 3) AddLNode( lshl->in(1), y_z ) );
+      return new (phase->C, 3) AndLNode( sum, phase->longcon(mask) );
+    }
+  }
+
+  // Check for (x & mask) >>> z.  Replace with (x >>> z) & (mask >>> z)
+  // This shortens the mask.  Also, if we are extracting a high byte and
+  // storing it to a buffer, the mask will be removed completely.
+  Node *andi = in(1);
+  if( andi->Opcode() == Op_AndL ) {
+    const TypeLong *t3 = phase->type( andi->in(2) )->isa_long();
+    if( t3 && t3->is_con() ) { // Right input is a constant
+      jlong mask2 = t3->get_con();
+      mask2 >>= con;  // *signed* shift downward (high-order zeroes do not help)
+      Node *newshr = phase->transform( new (phase->C, 3) URShiftLNode(andi->in(1), in(2)) );
+      return new (phase->C, 3) AndLNode(newshr, phase->longcon(mask2));
+    }
+  }
+
+  // Check for "(X << z ) >>> z" which simply zero-extends
+  Node *shl = in(1);
+  if( shl->Opcode() == Op_LShiftL &&
+      phase->type(shl->in(2)) == t2 )
+    return new (phase->C, 3) AndLNode( shl->in(1), phase->longcon(mask) );
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// A URShiftINode shifts its input2 right by input1 amount.
+const Type *URShiftLNode::Value( PhaseTransform *phase ) const {
+  // (This is a near clone of RShiftLNode::Value.)
+  const Type *t1 = phase->type( in(1) );
+  const Type *t2 = phase->type( in(2) );
+  // Either input is TOP ==> the result is TOP
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Left input is ZERO ==> the result is ZERO.
+  if( t1 == TypeLong::ZERO ) return TypeLong::ZERO;
+  // Shift by zero does nothing
+  if( t2 == TypeInt::ZERO ) return t1;
+
+  // Either input is BOTTOM ==> the result is BOTTOM
+  if (t1 == Type::BOTTOM || t2 == Type::BOTTOM)
+    return TypeLong::LONG;
+
+  if (t2 == TypeInt::INT)
+    return TypeLong::LONG;
+
+  const TypeLong *r1 = t1->is_long(); // Handy access
+  const TypeInt  *r2 = t2->is_int (); // Handy access
+
+  if (r2->is_con()) {
+    uint shift = r2->get_con();
+    shift &= (2*BitsPerJavaInteger)-1;  // semantics of Java shifts
+    // Shift by a multiple of 64 does nothing:
+    if (shift == 0)  return t1;
+    // Calculate reasonably aggressive bounds for the result.
+    jlong lo = (julong)r1->_lo >> (juint)shift;
+    jlong hi = (julong)r1->_hi >> (juint)shift;
+    if (r1->_hi >= 0 && r1->_lo < 0) {
+      // If the type has both negative and positive values,
+      // there are two separate sub-domains to worry about:
+      // The positive half and the negative half.
+      jlong neg_lo = lo;
+      jlong neg_hi = (julong)-1 >> (juint)shift;
+      jlong pos_lo = (julong) 0 >> (juint)shift;
+      jlong pos_hi = hi;
+      //lo = MIN2(neg_lo, pos_lo);  // == 0
+      lo = neg_lo < pos_lo ? neg_lo : pos_lo;
+      //hi = MAX2(neg_hi, pos_hi);  // == -1 >>> shift;
+      hi = neg_hi > pos_hi ? neg_hi : pos_hi;
+    }
+    assert(lo <= hi, "must have valid bounds");
+    const TypeLong* tl = TypeLong::make(lo, hi, MAX2(r1->_widen,r2->_widen));
+    #ifdef ASSERT
+    // Make sure we get the sign-capture idiom correct.
+    if (shift == (2*BitsPerJavaInteger)-1) {
+      if (r1->_lo >= 0) assert(tl == TypeLong::ZERO, ">>>63 of + is 0");
+      if (r1->_hi < 0)  assert(tl == TypeLong::ONE,  ">>>63 of - is +1");
+    }
+    #endif
+    return tl;
+  }
+
+  return TypeLong::LONG;                // Give up
+}
diff --git a/src/share/vm/opto/mulnode.hpp b/src/share/vm/opto/mulnode.hpp
new file mode 100644
index 000000000..380e35a89
--- /dev/null
+++ b/src/share/vm/opto/mulnode.hpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+class PhaseTransform;
+
+//------------------------------MulNode----------------------------------------
+// Classic MULTIPLY functionality.  This covers all the usual 'multiply'
+// behaviors for an algebraic ring.  Multiply-integer, multiply-float,
+// multiply-double, and binary-and are all inherited from this class.  The
+// various identity values are supplied by virtual functions.
+class MulNode : public Node {
+  virtual uint hash() const;
+public:
+  MulNode( Node *in1, Node *in2 ): Node(0,in1,in2) {
+    init_class_id(Class_Mul);
+  }
+
+  // Handle algebraic identities here.  If we have an identity, return the Node
+  // we are equivalent to.  We look for "add of zero" as an identity.
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // We also canonicalize the Node, moving constants to the right input,
+  // and flatten expressions (so that 1+x+2 becomes x+3).
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  // Compute a new Type for this node.  Basically we just do the pre-check,
+  // then call the virtual add() to set the type.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  // Supplied function returns the product of the inputs.
+  // This also type-checks the inputs for sanity.  Guaranteed never to
+  // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+  // This call recognizes the multiplicative zero type.
+  virtual const Type *mul_ring( const Type *, const Type * ) const = 0;
+
+  // Supplied function to return the multiplicative identity type
+  virtual const Type *mul_id() const = 0;
+
+  // Supplied function to return the additive identity type
+  virtual const Type *add_id() const = 0;
+
+  // Supplied function to return the additive opcode
+  virtual int add_opcode() const = 0;
+
+  // Supplied function to return the multiplicative opcode
+  virtual int mul_opcode() const = 0;
+
+};
+
+//------------------------------MulINode---------------------------------------
+// Multiply 2 integers
+class MulINode : public MulNode {
+public:
+  MulINode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeInt::ONE; }
+  const Type *add_id() const { return TypeInt::ZERO; }
+  int add_opcode() const { return Op_AddI; }
+  int mul_opcode() const { return Op_MulI; }
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------MulLNode---------------------------------------
+// Multiply 2 longs
+class MulLNode : public MulNode {
+public:
+  MulLNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeLong::ONE; }
+  const Type *add_id() const { return TypeLong::ZERO; }
+  int add_opcode() const { return Op_AddL; }
+  int mul_opcode() const { return Op_MulL; }
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+
+//------------------------------MulFNode---------------------------------------
+// Multiply 2 floats
+class MulFNode : public MulNode {
+public:
+  MulFNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeF::ONE; }
+  const Type *add_id() const { return TypeF::ZERO; }
+  int add_opcode() const { return Op_AddF; }
+  int mul_opcode() const { return Op_MulF; }
+  const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------MulDNode---------------------------------------
+// Multiply 2 doubles
+class MulDNode : public MulNode {
+public:
+  MulDNode( Node *in1, Node *in2 ) : MulNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeD::ONE; }
+  const Type *add_id() const { return TypeD::ZERO; }
+  int add_opcode() const { return Op_AddD; }
+  int mul_opcode() const { return Op_MulD; }
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------AndINode---------------------------------------
+// Logically AND 2 integers.  Included with the MUL nodes because it inherits
+// all the behavior of multiplication on a ring.
+class AndINode : public MulINode {
+public:
+  AndINode( Node *in1, Node *in2 ) : MulINode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeInt::MINUS_1; }
+  const Type *add_id() const { return TypeInt::ZERO; }
+  int add_opcode() const { return Op_OrI; }
+  int mul_opcode() const { return Op_AndI; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AndINode---------------------------------------
+// Logically AND 2 longs.  Included with the MUL nodes because it inherits
+// all the behavior of multiplication on a ring.
+class AndLNode : public MulLNode {
+public:
+  AndLNode( Node *in1, Node *in2 ) : MulLNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *mul_ring( const Type *, const Type * ) const;
+  const Type *mul_id() const { return TypeLong::MINUS_1; }
+  const Type *add_id() const { return TypeLong::ZERO; }
+  int add_opcode() const { return Op_OrL; }
+  int mul_opcode() const { return Op_AndL; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------LShiftINode------------------------------------
+// Logical shift left
+class LShiftINode : public Node {
+public:
+  LShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------LShiftLNode------------------------------------
+// Logical shift left
+class LShiftLNode : public Node {
+public:
+  LShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------RShiftINode------------------------------------
+// Signed shift right
+class RShiftINode : public Node {
+public:
+  RShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------RShiftLNode------------------------------------
+// Signed shift right
+class RShiftLNode : public Node {
+public:
+  RShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+
+//------------------------------URShiftINode-----------------------------------
+// Logical shift right
+class URShiftINode : public Node {
+public:
+  URShiftINode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------URShiftLNode-----------------------------------
+// Logical shift right
+class URShiftLNode : public Node {
+public:
+  URShiftLNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Identity( PhaseTransform *phase );
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
diff --git a/src/share/vm/opto/multnode.cpp b/src/share/vm/opto/multnode.cpp
new file mode 100644
index 000000000..5caa3dd8f
--- /dev/null
+++ b/src/share/vm/opto/multnode.cpp
@@ -0,0 +1,129 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_multnode.cpp.incl"
+
+//=============================================================================
+//------------------------------MultiNode--------------------------------------
+const RegMask &MultiNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+Node *MultiNode::match( const ProjNode *proj, const Matcher *m ) { return proj->clone(); }
+
+//------------------------------proj_out---------------------------------------
+// Get a named projection
+ProjNode* MultiNode::proj_out(uint which_proj) const {
+  assert(Opcode() != Op_If || which_proj == (uint)true || which_proj == (uint)false, "must be 1 or 0");
+  assert(Opcode() != Op_If || outcnt() == 2, "bad if #1");
+  for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
+    Node *p = fast_out(i);
+    if( !p->is_Proj() ) {
+      assert(p == this && this->is_Start(), "else must be proj");
+      continue;
+    }
+    ProjNode *proj = p->as_Proj();
+    if( proj->_con == which_proj ) {
+      assert(Opcode() != Op_If || proj->Opcode() == (which_proj?Op_IfTrue:Op_IfFalse), "bad if #2");
+      return proj;
+    }
+  }
+  return NULL;
+}
+
+//=============================================================================
+//------------------------------ProjNode---------------------------------------
+uint ProjNode::hash() const {
+  // only one input
+  return (uintptr_t)in(TypeFunc::Control) + (_con << 1) + (_is_io_use ? 1 : 0);
+}
+uint ProjNode::cmp( const Node &n ) const { return _con == ((ProjNode&)n)._con && ((ProjNode&)n)._is_io_use == _is_io_use; }
+uint ProjNode::size_of() const { return sizeof(ProjNode); }
+
+// Test if we propagate interesting control along this projection
+bool ProjNode::is_CFG() const {
+  Node *def = in(0);
+  return (_con == TypeFunc::Control && def->is_CFG());
+}
+
+const Type *ProjNode::bottom_type() const {
+  if (in(0) == NULL)  return Type::TOP;
+  const Type *tb = in(0)->bottom_type();
+  if( tb == Type::TOP ) return Type::TOP;
+  if( tb == Type::BOTTOM ) return Type::BOTTOM;
+  const TypeTuple *t = tb->is_tuple();
+  return t->field_at(_con);
+}
+
+const TypePtr *ProjNode::adr_type() const {
+  if (bottom_type() == Type::MEMORY) {
+    // in(0) might be a narrow MemBar; otherwise we will report TypePtr::BOTTOM
+    const TypePtr* adr_type = in(0)->adr_type();
+    #ifdef ASSERT
+    if (!is_error_reported() && !Node::in_dump())
+      assert(adr_type != NULL, "source must have adr_type");
+    #endif
+    return adr_type;
+  }
+  assert(bottom_type()->base() != Type::Memory, "no other memories?");
+  return NULL;
+}
+
+bool ProjNode::pinned() const { return in(0)->pinned(); }
+#ifndef PRODUCT
+void ProjNode::dump_spec(outputStream *st) const { st->print("#%d",_con); if(_is_io_use) st->print(" (i_o_use)");}
+#endif
+
+//----------------------------check_con----------------------------------------
+void ProjNode::check_con() const {
+  Node* n = in(0);
+  if (n == NULL)       return;  // should be assert, but NodeHash makes bogons
+  if (n->is_Mach())    return;  // mach. projs. are not type-safe
+  if (n->is_Start())   return;  // alas, starts can have mach. projs. also
+  if (_con == SCMemProjNode::SCMEMPROJCON ) return;
+  const Type* t = n->bottom_type();
+  if (t == Type::TOP)  return;  // multi is dead
+  assert(_con < t->is_tuple()->cnt(), "ProjNode::_con must be in range");
+}
+
+//------------------------------Value------------------------------------------
+const Type *ProjNode::Value( PhaseTransform *phase ) const {
+  if( !in(0) ) return Type::TOP;
+  const Type *t = phase->type(in(0));
+  if( t == Type::TOP ) return t;
+  if( t == Type::BOTTOM ) return t;
+  return t->is_tuple()->field_at(_con);
+}
+
+//------------------------------out_RegMask------------------------------------
+// Pass the buck uphill
+const RegMask &ProjNode::out_RegMask() const {
+  return RegMask::Empty;
+}
+
+//------------------------------ideal_reg--------------------------------------
+uint ProjNode::ideal_reg() const {
+  return Matcher::base2reg[bottom_type()->base()];
+}
diff --git a/src/share/vm/opto/multnode.hpp b/src/share/vm/opto/multnode.hpp
new file mode 100644
index 000000000..34a573ffc
--- /dev/null
+++ b/src/share/vm/opto/multnode.hpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Matcher;
+class ProjNode;
+
+//------------------------------MultiNode--------------------------------------
+// This class defines a MultiNode, a Node which produces many values.  The
+// values are wrapped up in a tuple Type, i.e. a TypeTuple.
+class MultiNode : public Node {
+public:
+  MultiNode( uint required ) : Node(required) {
+    init_class_id(Class_Multi);
+  }
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const = 0;
+  virtual bool       is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const RegMask &out_RegMask() const;
+  virtual Node *match( const ProjNode *proj, const Matcher *m );
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  ProjNode* proj_out(uint which_proj) const; // Get a named projection
+
+};
+
+//------------------------------ProjNode---------------------------------------
+// This class defines a Projection node.  Projections project a single element
+// out of a tuple (or Signature) type.  Only MultiNodes produce TypeTuple
+// results.
+class ProjNode : public Node {
+protected:
+  virtual uint hash() const;
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const;
+  void check_con() const;       // Called from constructor.
+
+public:
+  ProjNode( Node *src, uint con, bool io_use = false )
+    : Node( src ), _con(con), _is_io_use(io_use)
+  {
+    init_class_id(Class_Proj);
+    debug_only(check_con());
+  }
+  const uint _con;              // The field in the tuple we are projecting
+  const bool _is_io_use;        // Used to distinguish between the projections
+                                // used on the control and io paths from a macro node
+  virtual int Opcode() const;
+  virtual bool      is_CFG() const;
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Type *bottom_type() const;
+  virtual const TypePtr *adr_type() const;
+  virtual bool pinned() const;
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual uint ideal_reg() const;
+  virtual const RegMask &out_RegMask() const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
diff --git a/src/share/vm/opto/node.cpp b/src/share/vm/opto/node.cpp
new file mode 100644
index 000000000..d7563d611
--- /dev/null
+++ b/src/share/vm/opto/node.cpp
@@ -0,0 +1,1919 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_node.cpp.incl"
+
+class RegMask;
+// #include "phase.hpp"
+class PhaseTransform;
+class PhaseGVN;
+
+// Arena we are currently building Nodes in
+const uint Node::NotAMachineReg = 0xffff0000;
+
+#ifndef PRODUCT
+extern int nodes_created;
+#endif
+
+#ifdef ASSERT
+
+//-------------------------- construct_node------------------------------------
+// Set a breakpoint here to identify where a particular node index is built.
+void Node::verify_construction() {
+  _debug_orig = NULL;
+  int old_debug_idx = Compile::debug_idx();
+  int new_debug_idx = old_debug_idx+1;
+  if (new_debug_idx > 0) {
+    // Arrange that the lowest five decimal digits of _debug_idx
+    // will repeat thos of _idx.  In case this is somehow pathological,
+    // we continue to assign negative numbers (!) consecutively.
+    const int mod = 100000;
+    int bump = (int)(_idx - new_debug_idx) % mod;
+    if (bump < 0)  bump += mod;
+    assert(bump >= 0 && bump < mod, "");
+    new_debug_idx += bump;
+  }
+  Compile::set_debug_idx(new_debug_idx);
+  set_debug_idx( new_debug_idx );
+  assert(Compile::current()->unique() < (uint)MaxNodeLimit, "Node limit exceeded");
+  if (BreakAtNode != 0 && (_debug_idx == BreakAtNode || (int)_idx == BreakAtNode)) {
+    tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d", _idx, _debug_idx);
+    BREAKPOINT;
+  }
+#if OPTO_DU_ITERATOR_ASSERT
+  _last_del = NULL;
+  _del_tick = 0;
+#endif
+  _hash_lock = 0;
+}
+
+
+// #ifdef ASSERT ...
+
+#if OPTO_DU_ITERATOR_ASSERT
+void DUIterator_Common::sample(const Node* node) {
+  _vdui     = VerifyDUIterators;
+  _node     = node;
+  _outcnt   = node->_outcnt;
+  _del_tick = node->_del_tick;
+  _last     = NULL;
+}
+
+void DUIterator_Common::verify(const Node* node, bool at_end_ok) {
+  assert(_node     == node, "consistent iterator source");
+  assert(_del_tick == node->_del_tick, "no unexpected deletions allowed");
+}
+
+void DUIterator_Common::verify_resync() {
+  // Ensure that the loop body has just deleted the last guy produced.
+  const Node* node = _node;
+  // Ensure that at least one copy of the last-seen edge was deleted.
+  // Note:  It is OK to delete multiple copies of the last-seen edge.
+  // Unfortunately, we have no way to verify that all the deletions delete
+  // that same edge.  On this point we must use the Honor System.
+  assert(node->_del_tick >= _del_tick+1, "must have deleted an edge");
+  assert(node->_last_del == _last, "must have deleted the edge just produced");
+  // We liked this deletion, so accept the resulting outcnt and tick.
+  _outcnt   = node->_outcnt;
+  _del_tick = node->_del_tick;
+}
+
+void DUIterator_Common::reset(const DUIterator_Common& that) {
+  if (this == &that)  return;  // ignore assignment to self
+  if (!_vdui) {
+    // We need to initialize everything, overwriting garbage values.
+    _last = that._last;
+    _vdui = that._vdui;
+  }
+  // Note:  It is legal (though odd) for an iterator over some node x
+  // to be reassigned to iterate over another node y.  Some doubly-nested
+  // progress loops depend on being able to do this.
+  const Node* node = that._node;
+  // Re-initialize everything, except _last.
+  _node     = node;
+  _outcnt   = node->_outcnt;
+  _del_tick = node->_del_tick;
+}
+
+void DUIterator::sample(const Node* node) {
+  DUIterator_Common::sample(node);      // Initialize the assertion data.
+  _refresh_tick = 0;                    // No refreshes have happened, as yet.
+}
+
+void DUIterator::verify(const Node* node, bool at_end_ok) {
+  DUIterator_Common::verify(node, at_end_ok);
+  assert(_idx      <  node->_outcnt + (uint)at_end_ok, "idx in range");
+}
+
+void DUIterator::verify_increment() {
+  if (_refresh_tick & 1) {
+    // We have refreshed the index during this loop.
+    // Fix up _idx to meet asserts.
+    if (_idx > _outcnt)  _idx = _outcnt;
+  }
+  verify(_node, true);
+}
+
+void DUIterator::verify_resync() {
+  // Note:  We do not assert on _outcnt, because insertions are OK here.
+  DUIterator_Common::verify_resync();
+  // Make sure we are still in sync, possibly with no more out-edges:
+  verify(_node, true);
+}
+
+void DUIterator::reset(const DUIterator& that) {
+  if (this == &that)  return;  // self assignment is always a no-op
+  assert(that._refresh_tick == 0, "assign only the result of Node::outs()");
+  assert(that._idx          == 0, "assign only the result of Node::outs()");
+  assert(_idx               == that._idx, "already assigned _idx");
+  if (!_vdui) {
+    // We need to initialize everything, overwriting garbage values.
+    sample(that._node);
+  } else {
+    DUIterator_Common::reset(that);
+    if (_refresh_tick & 1) {
+      _refresh_tick++;                  // Clear the "was refreshed" flag.
+    }
+    assert(_refresh_tick < 2*100000, "DU iteration must converge quickly");
+  }
+}
+
+void DUIterator::refresh() {
+  DUIterator_Common::sample(_node);     // Re-fetch assertion data.
+  _refresh_tick |= 1;                   // Set the "was refreshed" flag.
+}
+
+void DUIterator::verify_finish() {
+  // If the loop has killed the node, do not require it to re-run.
+  if (_node->_outcnt == 0)  _refresh_tick &= ~1;
+  // If this assert triggers, it means that a loop used refresh_out_pos
+  // to re-synch an iteration index, but the loop did not correctly
+  // re-run itself, using a "while (progress)" construct.
+  // This iterator enforces the rule that you must keep trying the loop
+  // until it "runs clean" without any need for refreshing.
+  assert(!(_refresh_tick & 1), "the loop must run once with no refreshing");
+}
+
+
+void DUIterator_Fast::verify(const Node* node, bool at_end_ok) {
+  DUIterator_Common::verify(node, at_end_ok);
+  Node** out    = node->_out;
+  uint   cnt    = node->_outcnt;
+  assert(cnt == _outcnt, "no insertions allowed");
+  assert(_outp >= out && _outp <= out + cnt - !at_end_ok, "outp in range");
+  // This last check is carefully designed to work for NO_OUT_ARRAY.
+}
+
+void DUIterator_Fast::verify_limit() {
+  const Node* node = _node;
+  verify(node, true);
+  assert(_outp == node->_out + node->_outcnt, "limit still correct");
+}
+
+void DUIterator_Fast::verify_resync() {
+  const Node* node = _node;
+  if (_outp == node->_out + _outcnt) {
+    // Note that the limit imax, not the pointer i, gets updated with the
+    // exact count of deletions.  (For the pointer it's always "--i".)
+    assert(node->_outcnt+node->_del_tick == _outcnt+_del_tick, "no insertions allowed with deletion(s)");
+    // This is a limit pointer, with a name like "imax".
+    // Fudge the _last field so that the common assert will be happy.
+    _last = (Node*) node->_last_del;
+    DUIterator_Common::verify_resync();
+  } else {
+    assert(node->_outcnt < _outcnt, "no insertions allowed with deletion(s)");
+    // A normal internal pointer.
+    DUIterator_Common::verify_resync();
+    // Make sure we are still in sync, possibly with no more out-edges:
+    verify(node, true);
+  }
+}
+
+void DUIterator_Fast::verify_relimit(uint n) {
+  const Node* node = _node;
+  assert((int)n > 0, "use imax -= n only with a positive count");
+  // This must be a limit pointer, with a name like "imax".
+  assert(_outp == node->_out + node->_outcnt, "apply -= only to a limit (imax)");
+  // The reported number of deletions must match what the node saw.
+  assert(node->_del_tick == _del_tick + n, "must have deleted n edges");
+  // Fudge the _last field so that the common assert will be happy.
+  _last = (Node*) node->_last_del;
+  DUIterator_Common::verify_resync();
+}
+
+void DUIterator_Fast::reset(const DUIterator_Fast& that) {
+  assert(_outp              == that._outp, "already assigned _outp");
+  DUIterator_Common::reset(that);
+}
+
+void DUIterator_Last::verify(const Node* node, bool at_end_ok) {
+  // at_end_ok means the _outp is allowed to underflow by 1
+  _outp += at_end_ok;
+  DUIterator_Fast::verify(node, at_end_ok);  // check _del_tick, etc.
+  _outp -= at_end_ok;
+  assert(_outp == (node->_out + node->_outcnt) - 1, "pointer must point to end of nodes");
+}
+
+void DUIterator_Last::verify_limit() {
+  // Do not require the limit address to be resynched.
+  //verify(node, true);
+  assert(_outp == _node->_out, "limit still correct");
+}
+
+void DUIterator_Last::verify_step(uint num_edges) {
+  assert((int)num_edges > 0, "need non-zero edge count for loop progress");
+  _outcnt   -= num_edges;
+  _del_tick += num_edges;
+  // Make sure we are still in sync, possibly with no more out-edges:
+  const Node* node = _node;
+  verify(node, true);
+  assert(node->_last_del == _last, "must have deleted the edge just produced");
+}
+
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+
+#endif //ASSERT
+
+
+// This constant used to initialize _out may be any non-null value.
+// The value NULL is reserved for the top node only.
+#define NO_OUT_ARRAY ((Node**)-1)
+
+// This funny expression handshakes with Node::operator new
+// to pull Compile::current out of the new node's _out field,
+// and then calls a subroutine which manages most field
+// initializations.  The only one which is tricky is the
+// _idx field, which is const, and so must be initialized
+// by a return value, not an assignment.
+//
+// (Aren't you thankful that Java finals don't require so many tricks?)
+#define IDX_INIT(req) this->Init((req), (Compile*) this->_out)
+#ifdef _MSC_VER // the IDX_INIT hack falls foul of warning C4355
+#pragma warning( disable:4355 ) // 'this' : used in base member initializer list
+#endif
+
+// Out-of-line code from node constructors.
+// Executed only when extra debug info. is being passed around.
+static void init_node_notes(Compile* C, int idx, Node_Notes* nn) {
+  C->set_node_notes_at(idx, nn);
+}
+
+// Shared initialization code.
+inline int Node::Init(int req, Compile* C) {
+  assert(Compile::current() == C, "must use operator new(Compile*)");
+  int idx = C->next_unique();
+
+  // If there are default notes floating around, capture them:
+  Node_Notes* nn = C->default_node_notes();
+  if (nn != NULL)  init_node_notes(C, idx, nn);
+
+  // Note:  At this point, C is dead,
+  // and we begin to initialize the new Node.
+
+  _cnt = _max = req;
+  _outcnt = _outmax = 0;
+  _class_id = Class_Node;
+  _flags = 0;
+  _out = NO_OUT_ARRAY;
+  return idx;
+}
+
+//------------------------------Node-------------------------------------------
+// Create a Node, with a given number of required edges.
+Node::Node(uint req)
+  : _idx(IDX_INIT(req))
+{
+  assert( req < (uint)(MaxNodeLimit - NodeLimitFudgeFactor), "Input limit exceeded" );
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  if (req == 0) {
+    assert( _in == (Node**)this, "Must not pass arg count to 'new'" );
+    _in = NULL;
+  } else {
+    assert( _in[req-1] == this, "Must pass arg count to 'new'" );
+    Node** to = _in;
+    for(uint i = 0; i < req; i++) {
+      to[i] = NULL;
+    }
+  }
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0)
+  : _idx(IDX_INIT(1))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[0] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1)
+  : _idx(IDX_INIT(2))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[1] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2)
+  : _idx(IDX_INIT(3))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[2] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  assert( is_not_dead(n2), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+  _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3)
+  : _idx(IDX_INIT(4))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[3] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  assert( is_not_dead(n2), "can not use dead node");
+  assert( is_not_dead(n3), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+  _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+  _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3, Node *n4)
+  : _idx(IDX_INIT(5))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[4] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  assert( is_not_dead(n2), "can not use dead node");
+  assert( is_not_dead(n3), "can not use dead node");
+  assert( is_not_dead(n4), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+  _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+  _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+  _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
+                     Node *n4, Node *n5)
+  : _idx(IDX_INIT(6))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[5] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  assert( is_not_dead(n2), "can not use dead node");
+  assert( is_not_dead(n3), "can not use dead node");
+  assert( is_not_dead(n4), "can not use dead node");
+  assert( is_not_dead(n5), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+  _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+  _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+  _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+  _in[5] = n5; if (n5 != NULL) n5->add_out((Node *)this);
+}
+
+//------------------------------Node-------------------------------------------
+Node::Node(Node *n0, Node *n1, Node *n2, Node *n3,
+                     Node *n4, Node *n5, Node *n6)
+  : _idx(IDX_INIT(7))
+{
+  debug_only( verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Assert we allocated space for input array already
+  assert( _in[6] == this, "Must pass arg count to 'new'" );
+  assert( is_not_dead(n0), "can not use dead node");
+  assert( is_not_dead(n1), "can not use dead node");
+  assert( is_not_dead(n2), "can not use dead node");
+  assert( is_not_dead(n3), "can not use dead node");
+  assert( is_not_dead(n4), "can not use dead node");
+  assert( is_not_dead(n5), "can not use dead node");
+  assert( is_not_dead(n6), "can not use dead node");
+  _in[0] = n0; if (n0 != NULL) n0->add_out((Node *)this);
+  _in[1] = n1; if (n1 != NULL) n1->add_out((Node *)this);
+  _in[2] = n2; if (n2 != NULL) n2->add_out((Node *)this);
+  _in[3] = n3; if (n3 != NULL) n3->add_out((Node *)this);
+  _in[4] = n4; if (n4 != NULL) n4->add_out((Node *)this);
+  _in[5] = n5; if (n5 != NULL) n5->add_out((Node *)this);
+  _in[6] = n6; if (n6 != NULL) n6->add_out((Node *)this);
+}
+
+
+//------------------------------clone------------------------------------------
+// Clone a Node.
+Node *Node::clone() const {
+  Compile *compile = Compile::current();
+  uint s = size_of();           // Size of inherited Node
+  Node *n = (Node*)compile->node_arena()->Amalloc_D(size_of() + _max*sizeof(Node*));
+  Copy::conjoint_words_to_lower((HeapWord*)this, (HeapWord*)n, s);
+  // Set the new input pointer array
+  n->_in = (Node**)(((char*)n)+s);
+  // Cannot share the old output pointer array, so kill it
+  n->_out = NO_OUT_ARRAY;
+  // And reset the counters to 0
+  n->_outcnt = 0;
+  n->_outmax = 0;
+  // Unlock this guy, since he is not in any hash table.
+  debug_only(n->_hash_lock = 0);
+  // Walk the old node's input list to duplicate its edges
+  uint i;
+  for( i = 0; i < len(); i++ ) {
+    Node *x = in(i);
+    n->_in[i] = x;
+    if (x != NULL) x->add_out(n);
+  }
+  if (is_macro())
+    compile->add_macro_node(n);
+
+  n->set_idx(compile->next_unique()); // Get new unique index as well
+  debug_only( n->verify_construction() );
+  NOT_PRODUCT(nodes_created++);
+  // Do not patch over the debug_idx of a clone, because it makes it
+  // impossible to break on the clone's moment of creation.
+  //debug_only( n->set_debug_idx( debug_idx() ) );
+
+  compile->copy_node_notes_to(n, (Node*) this);
+
+  // MachNode clone
+  uint nopnds;
+  if (this->is_Mach() && (nopnds = this->as_Mach()->num_opnds()) > 0) {
+    MachNode *mach  = n->as_Mach();
+    MachNode *mthis = this->as_Mach();
+    // Get address of _opnd_array.
+    // It should be the same offset since it is the clone of this node.
+    MachOper **from = mthis->_opnds;
+    MachOper **to = (MachOper **)((size_t)(&mach->_opnds) +
+                    pointer_delta((const void*)from,
+                                  (const void*)(&mthis->_opnds), 1));
+    mach->_opnds = to;
+    for ( uint i = 0; i < nopnds; ++i ) {
+      to[i] = from[i]->clone(compile);
+    }
+  }
+  // cloning CallNode may need to clone JVMState
+  if (n->is_Call()) {
+    CallNode *call = n->as_Call();
+    call->clone_jvms();
+  }
+  return n;                     // Return the clone
+}
+
+//---------------------------setup_is_top--------------------------------------
+// Call this when changing the top node, to reassert the invariants
+// required by Node::is_top.  See Compile::set_cached_top_node.
+void Node::setup_is_top() {
+  if (this == (Node*)Compile::current()->top()) {
+    // This node has just become top.  Kill its out array.
+    _outcnt = _outmax = 0;
+    _out = NULL;                           // marker value for top
+    assert(is_top(), "must be top");
+  } else {
+    if (_out == NULL)  _out = NO_OUT_ARRAY;
+    assert(!is_top(), "must not be top");
+  }
+}
+
+
+//------------------------------~Node------------------------------------------
+// Fancy destructor; eagerly attempt to reclaim Node numberings and storage
+extern int reclaim_idx ;
+extern int reclaim_in  ;
+extern int reclaim_node;
+void Node::destruct() {
+  // Eagerly reclaim unique Node numberings
+  Compile* compile = Compile::current();
+  if ((uint)_idx+1 == compile->unique()) {
+    compile->set_unique(compile->unique()-1);
+#ifdef ASSERT
+    reclaim_idx++;
+#endif
+  }
+  // Clear debug info:
+  Node_Notes* nn = compile->node_notes_at(_idx);
+  if (nn != NULL)  nn->clear();
+  // Walk the input array, freeing the corresponding output edges
+  _cnt = _max;  // forget req/prec distinction
+  uint i;
+  for( i = 0; i < _max; i++ ) {
+    set_req(i, NULL);
+    //assert(def->out(def->outcnt()-1) == (Node *)this,"bad def-use hacking in reclaim");
+  }
+  assert(outcnt() == 0, "deleting a node must not leave a dangling use");
+  // See if the input array was allocated just prior to the object
+  int edge_size = _max*sizeof(void*);
+  int out_edge_size = _outmax*sizeof(void*);
+  char *edge_end = ((char*)_in) + edge_size;
+  char *out_array = (char*)(_out == NO_OUT_ARRAY? NULL: _out);
+  char *out_edge_end = out_array + out_edge_size;
+  int node_size = size_of();
+
+  // Free the output edge array
+  if (out_edge_size > 0) {
+#ifdef ASSERT
+    if( out_edge_end == compile->node_arena()->hwm() )
+      reclaim_in  += out_edge_size;  // count reclaimed out edges with in edges
+#endif
+    compile->node_arena()->Afree(out_array, out_edge_size);
+  }
+
+  // Free the input edge array and the node itself
+  if( edge_end == (char*)this ) {
+#ifdef ASSERT
+    if( edge_end+node_size == compile->node_arena()->hwm() ) {
+      reclaim_in  += edge_size;
+      reclaim_node+= node_size;
+    }
+#else
+    // It was; free the input array and object all in one hit
+    compile->node_arena()->Afree(_in,edge_size+node_size);
+#endif
+  } else {
+
+    // Free just the input array
+#ifdef ASSERT
+    if( edge_end == compile->node_arena()->hwm() )
+      reclaim_in  += edge_size;
+#endif
+    compile->node_arena()->Afree(_in,edge_size);
+
+    // Free just the object
+#ifdef ASSERT
+    if( ((char*)this) + node_size == compile->node_arena()->hwm() )
+      reclaim_node+= node_size;
+#else
+    compile->node_arena()->Afree(this,node_size);
+#endif
+  }
+  if (is_macro()) {
+    compile->remove_macro_node(this);
+  }
+#ifdef ASSERT
+  // We will not actually delete the storage, but we'll make the node unusable.
+  *(address*)this = badAddress;  // smash the C++ vtbl, probably
+  _in = _out = (Node**) badAddress;
+  _max = _cnt = _outmax = _outcnt = 0;
+#endif
+}
+
+//------------------------------grow-------------------------------------------
+// Grow the input array, making space for more edges
+void Node::grow( uint len ) {
+  Arena* arena = Compile::current()->node_arena();
+  uint new_max = _max;
+  if( new_max == 0 ) {
+    _max = 4;
+    _in = (Node**)arena->Amalloc(4*sizeof(Node*));
+    Node** to = _in;
+    to[0] = NULL;
+    to[1] = NULL;
+    to[2] = NULL;
+    to[3] = NULL;
+    return;
+  }
+  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  // Trimming to limit allows a uint8 to handle up to 255 edges.
+  // Previously I was using only powers-of-2 which peaked at 128 edges.
+  //if( new_max >= limit ) new_max = limit-1;
+  _in = (Node**)arena->Arealloc(_in, _max*sizeof(Node*), new_max*sizeof(Node*));
+  Copy::zero_to_bytes(&_in[_max], (new_max-_max)*sizeof(Node*)); // NULL all new space
+  _max = new_max;               // Record new max length
+  // This assertion makes sure that Node::_max is wide enough to
+  // represent the numerical value of new_max.
+  assert(_max == new_max && _max > len, "int width of _max is too small");
+}
+
+//-----------------------------out_grow----------------------------------------
+// Grow the input array, making space for more edges
+void Node::out_grow( uint len ) {
+  assert(!is_top(), "cannot grow a top node's out array");
+  Arena* arena = Compile::current()->node_arena();
+  uint new_max = _outmax;
+  if( new_max == 0 ) {
+    _outmax = 4;
+    _out = (Node **)arena->Amalloc(4*sizeof(Node*));
+    return;
+  }
+  while( new_max <= len ) new_max <<= 1; // Find next power-of-2
+  // Trimming to limit allows a uint8 to handle up to 255 edges.
+  // Previously I was using only powers-of-2 which peaked at 128 edges.
+  //if( new_max >= limit ) new_max = limit-1;
+  assert(_out != NULL && _out != NO_OUT_ARRAY, "out must have sensible value");
+  _out = (Node**)arena->Arealloc(_out,_outmax*sizeof(Node*),new_max*sizeof(Node*));
+  //Copy::zero_to_bytes(&_out[_outmax], (new_max-_outmax)*sizeof(Node*)); // NULL all new space
+  _outmax = new_max;               // Record new max length
+  // This assertion makes sure that Node::_max is wide enough to
+  // represent the numerical value of new_max.
+  assert(_outmax == new_max && _outmax > len, "int width of _outmax is too small");
+}
+
+#ifdef ASSERT
+//------------------------------is_dead----------------------------------------
+bool Node::is_dead() const {
+  // Mach and pinch point nodes may look like dead.
+  if( is_top() || is_Mach() || (Opcode() == Op_Node && _outcnt > 0) )
+    return false;
+  for( uint i = 0; i < _max; i++ )
+    if( _in[i] != NULL )
+      return false;
+  dump();
+  return true;
+}
+#endif
+
+//------------------------------add_req----------------------------------------
+// Add a new required input at the end
+void Node::add_req( Node *n ) {
+  assert( is_not_dead(n), "can not use dead node");
+
+  // Look to see if I can move precedence down one without reallocating
+  if( (_cnt >= _max) || (in(_max-1) != NULL) )
+    grow( _max+1 );
+
+  // Find a precedence edge to move
+  if( in(_cnt) != NULL ) {       // Next precedence edge is busy?
+    uint i;
+    for( i=_cnt; i<_max; i++ )
+      if( in(i) == NULL )       // Find the NULL at end of prec edge list
+        break;                  // There must be one, since we grew the array
+    _in[i] = in(_cnt);          // Move prec over, making space for req edge
+  }
+  _in[_cnt++] = n;            // Stuff over old prec edge
+  if (n != NULL) n->add_out((Node *)this);
+}
+
+//---------------------------add_req_batch-------------------------------------
+// Add a new required input at the end
+void Node::add_req_batch( Node *n, uint m ) {
+  assert( is_not_dead(n), "can not use dead node");
+  // check various edge cases
+  if ((int)m <= 1) {
+    assert((int)m >= 0, "oob");
+    if (m != 0)  add_req(n);
+    return;
+  }
+
+  // Look to see if I can move precedence down one without reallocating
+  if( (_cnt+m) > _max || _in[_max-m] )
+    grow( _max+m );
+
+  // Find a precedence edge to move
+  if( _in[_cnt] != NULL ) {     // Next precedence edge is busy?
+    uint i;
+    for( i=_cnt; i<_max; i++ )
+      if( _in[i] == NULL )      // Find the NULL at end of prec edge list
+        break;                  // There must be one, since we grew the array
+    // Slide all the precs over by m positions (assume #prec << m).
+    Copy::conjoint_words_to_higher((HeapWord*)&_in[_cnt], (HeapWord*)&_in[_cnt+m], ((i-_cnt)*sizeof(Node*)));
+  }
+
+  // Stuff over the old prec edges
+  for(uint i=0; i<m; i++ ) {
+    _in[_cnt++] = n;
+  }
+
+  // Insert multiple out edges on the node.
+  if (n != NULL && !n->is_top()) {
+    for(uint i=0; i<m; i++ ) {
+      n->add_out((Node *)this);
+    }
+  }
+}
+
+//------------------------------del_req----------------------------------------
+// Delete the required edge and compact the edge array
+void Node::del_req( uint idx ) {
+  // First remove corresponding def-use edge
+  Node *n = in(idx);
+  if (n != NULL) n->del_out((Node *)this);
+  _in[idx] = in(--_cnt);  // Compact the array
+  _in[_cnt] = NULL;       // NULL out emptied slot
+}
+
+//------------------------------ins_req----------------------------------------
+// Insert a new required input at the end
+void Node::ins_req( uint idx, Node *n ) {
+  assert( is_not_dead(n), "can not use dead node");
+  add_req(NULL);                // Make space
+  assert( idx < _max, "Must have allocated enough space");
+  // Slide over
+  if(_cnt-idx-1 > 0) {
+    Copy::conjoint_words_to_higher((HeapWord*)&_in[idx], (HeapWord*)&_in[idx+1], ((_cnt-idx-1)*sizeof(Node*)));
+  }
+  _in[idx] = n;                            // Stuff over old required edge
+  if (n != NULL) n->add_out((Node *)this); // Add reciprocal def-use edge
+}
+
+//-----------------------------find_edge---------------------------------------
+int Node::find_edge(Node* n) {
+  for (uint i = 0; i < len(); i++) {
+    if (_in[i] == n)  return i;
+  }
+  return -1;
+}
+
+//----------------------------replace_edge-------------------------------------
+int Node::replace_edge(Node* old, Node* neww) {
+  if (old == neww)  return 0;  // nothing to do
+  uint nrep = 0;
+  for (uint i = 0; i < len(); i++) {
+    if (in(i) == old) {
+      if (i < req())
+        set_req(i, neww);
+      else
+        set_prec(i, neww);
+      nrep++;
+    }
+  }
+  return nrep;
+}
+
+//-------------------------disconnect_inputs-----------------------------------
+// NULL out all inputs to eliminate incoming Def-Use edges.
+// Return the number of edges between 'n' and 'this'
+int Node::disconnect_inputs(Node *n) {
+  int edges_to_n = 0;
+
+  uint cnt = req();
+  for( uint i = 0; i < cnt; ++i ) {
+    if( in(i) == 0 ) continue;
+    if( in(i) == n ) ++edges_to_n;
+    set_req(i, NULL);
+  }
+  // Remove precedence edges if any exist
+  // Note: Safepoints may have precedence edges, even during parsing
+  if( (req() != len()) && (in(req()) != NULL) ) {
+    uint max = len();
+    for( uint i = 0; i < max; ++i ) {
+      if( in(i) == 0 ) continue;
+      if( in(i) == n ) ++edges_to_n;
+      set_prec(i, NULL);
+    }
+  }
+
+  // Node::destruct requires all out edges be deleted first
+  // debug_only(destruct();)   // no reuse benefit expected
+  return edges_to_n;
+}
+
+//-----------------------------uncast---------------------------------------
+// %%% Temporary, until we sort out CheckCastPP vs. CastPP.
+// Strip away casting.  (It is depth-limited.)
+Node* Node::uncast() const {
+  // Should be inline:
+  //return is_ConstraintCast() ? uncast_helper(this) : (Node*) this;
+  if (is_ConstraintCast() ||
+      (is_Type() && req() == 2 && Opcode() == Op_CheckCastPP))
+    return uncast_helper(this);
+  else
+    return (Node*) this;
+}
+
+//---------------------------uncast_helper-------------------------------------
+Node* Node::uncast_helper(const Node* p) {
+  uint max_depth = 3;
+  for (uint i = 0; i < max_depth; i++) {
+    if (p == NULL || p->req() != 2) {
+      break;
+    } else if (p->is_ConstraintCast()) {
+      p = p->in(1);
+    } else if (p->is_Type() && p->Opcode() == Op_CheckCastPP) {
+      p = p->in(1);
+    } else {
+      break;
+    }
+  }
+  return (Node*) p;
+}
+
+//------------------------------add_prec---------------------------------------
+// Add a new precedence input.  Precedence inputs are unordered, with
+// duplicates removed and NULLs packed down at the end.
+void Node::add_prec( Node *n ) {
+  assert( is_not_dead(n), "can not use dead node");
+
+  // Check for NULL at end
+  if( _cnt >= _max || in(_max-1) )
+    grow( _max+1 );
+
+  // Find a precedence edge to move
+  uint i = _cnt;
+  while( in(i) != NULL ) i++;
+  _in[i] = n;                                // Stuff prec edge over NULL
+  if ( n != NULL) n->add_out((Node *)this);  // Add mirror edge
+}
+
+//------------------------------rm_prec----------------------------------------
+// Remove a precedence input.  Precedence inputs are unordered, with
+// duplicates removed and NULLs packed down at the end.
+void Node::rm_prec( uint j ) {
+
+  // Find end of precedence list to pack NULLs
+  uint i;
+  for( i=j; i<_max; i++ )
+    if( !_in[i] )               // Find the NULL at end of prec edge list
+      break;
+  if (_in[j] != NULL) _in[j]->del_out((Node *)this);
+  _in[j] = _in[--i];            // Move last element over removed guy
+  _in[i] = NULL;                // NULL out last element
+}
+
+//------------------------------size_of----------------------------------------
+uint Node::size_of() const { return sizeof(*this); }
+
+//------------------------------ideal_reg--------------------------------------
+uint Node::ideal_reg() const { return 0; }
+
+//------------------------------jvms-------------------------------------------
+JVMState* Node::jvms() const { return NULL; }
+
+#ifdef ASSERT
+//------------------------------jvms-------------------------------------------
+bool Node::verify_jvms(const JVMState* using_jvms) const {
+  for (JVMState* jvms = this->jvms(); jvms != NULL; jvms = jvms->caller()) {
+    if (jvms == using_jvms)  return true;
+  }
+  return false;
+}
+
+//------------------------------init_NodeProperty------------------------------
+void Node::init_NodeProperty() {
+  assert(_max_classes <= max_jushort, "too many NodeProperty classes");
+  assert(_max_flags <= max_jushort, "too many NodeProperty flags");
+}
+#endif
+
+//------------------------------format-----------------------------------------
+// Print as assembly
+void Node::format( PhaseRegAlloc *, outputStream *st ) const {}
+//------------------------------emit-------------------------------------------
+// Emit bytes starting at parameter 'ptr'.
+void Node::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {}
+//------------------------------size-------------------------------------------
+// Size of instruction in bytes
+uint Node::size(PhaseRegAlloc *ra_) const { return 0; }
+
+//------------------------------CFG Construction-------------------------------
+// Nodes that end basic blocks, e.g. IfTrue/IfFalse, JumpProjNode, Root,
+// Goto and Return.
+const Node *Node::is_block_proj() const { return 0; }
+
+// Minimum guaranteed type
+const Type *Node::bottom_type() const { return Type::BOTTOM; }
+
+
+//------------------------------raise_bottom_type------------------------------
+// Get the worst-case Type output for this Node.
+void Node::raise_bottom_type(const Type* new_type) {
+  if (is_Type()) {
+    TypeNode *n = this->as_Type();
+    if (VerifyAliases) {
+      assert(new_type->higher_equal(n->type()), "new type must refine old type");
+    }
+    n->set_type(new_type);
+  } else if (is_Load()) {
+    LoadNode *n = this->as_Load();
+    if (VerifyAliases) {
+      assert(new_type->higher_equal(n->type()), "new type must refine old type");
+    }
+    n->set_type(new_type);
+  }
+}
+
+//------------------------------Identity---------------------------------------
+// Return a node that the given node is equivalent to.
+Node *Node::Identity( PhaseTransform * ) {
+  return this;                  // Default to no identities
+}
+
+//------------------------------Value------------------------------------------
+// Compute a new Type for a node using the Type of the inputs.
+const Type *Node::Value( PhaseTransform * ) const {
+  return bottom_type();         // Default to worst-case Type
+}
+
+//------------------------------Ideal------------------------------------------
+//
+// 'Idealize' the graph rooted at this Node.
+//
+// In order to be efficient and flexible there are some subtle invariants
+// these Ideal calls need to hold.  Running with '+VerifyIterativeGVN' checks
+// these invariants, although its too slow to have on by default.  If you are
+// hacking an Ideal call, be sure to test with +VerifyIterativeGVN!
+//
+// The Ideal call almost arbitrarily reshape the graph rooted at the 'this'
+// pointer.  If ANY change is made, it must return the root of the reshaped
+// graph - even if the root is the same Node.  Example: swapping the inputs
+// to an AddINode gives the same answer and same root, but you still have to
+// return the 'this' pointer instead of NULL.
+//
+// You cannot return an OLD Node, except for the 'this' pointer.  Use the
+// Identity call to return an old Node; basically if Identity can find
+// another Node have the Ideal call make no change and return NULL.
+// Example: AddINode::Ideal must check for add of zero; in this case it
+// returns NULL instead of doing any graph reshaping.
+//
+// You cannot modify any old Nodes except for the 'this' pointer.  Due to
+// sharing there may be other users of the old Nodes relying on their current
+// semantics.  Modifying them will break the other users.
+// Example: when reshape "(X+3)+4" into "X+7" you must leave the Node for
+// "X+3" unchanged in case it is shared.
+//
+// If you modify the 'this' pointer's inputs, you must use 'set_req' with
+// def-use info.  If you are making a new Node (either as the new root or
+// some new internal piece) you must NOT use set_req with def-use info.
+// You can make a new Node with either 'new' or 'clone'.  In either case,
+// def-use info is (correctly) not generated.
+// Example: reshape "(X+3)+4" into "X+7":
+//    set_req(1,in(1)->in(1) /* grab X */, du /* must use DU on 'this' */);
+//    set_req(2,phase->intcon(7),du);
+//    return this;
+// Example: reshape "X*4" into "X<<1"
+//    return new (C,3) LShiftINode( in(1), phase->intcon(1) );
+//
+// You must call 'phase->transform(X)' on any new Nodes X you make, except
+// for the returned root node.  Example: reshape "X*31" with "(X<<5)-1".
+//    Node *shift=phase->transform(new(C,3)LShiftINode(in(1),phase->intcon(5)));
+//    return new (C,3) AddINode(shift, phase->intcon(-1));
+//
+// When making a Node for a constant use 'phase->makecon' or 'phase->intcon'.
+// These forms are faster than 'phase->transform(new (C,1) ConNode())' and Do
+// The Right Thing with def-use info.
+//
+// You cannot bury the 'this' Node inside of a graph reshape.  If the reshaped
+// graph uses the 'this' Node it must be the root.  If you want a Node with
+// the same Opcode as the 'this' pointer use 'clone'.
+//
+Node *Node::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return NULL;                  // Default to being Ideal already
+}
+
+// Some nodes have specific Ideal subgraph transformations only if they are
+// unique users of specific nodes. Such nodes should be put on IGVN worklist
+// for the transformations to happen.
+bool Node::has_special_unique_user() const {
+  assert(outcnt() == 1, "match only for unique out");
+  Node* n = unique_out();
+  int op  = Opcode();
+  if( this->is_Store() ) {
+    // Condition for back-to-back stores folding.
+    return n->Opcode() == op && n->in(MemNode::Memory) == this;
+  } else if( op == Op_AddL ) {
+    // Condition for convL2I(addL(x,y)) ==> addI(convL2I(x),convL2I(y))
+    return n->Opcode() == Op_ConvL2I && n->in(1) == this;
+  } else if( op == Op_SubI || op == Op_SubL ) {
+    // Condition for subI(x,subI(y,z)) ==> subI(addI(x,z),y)
+    return n->Opcode() == op && n->in(2) == this;
+  }
+  return false;
+};
+
+//------------------------------remove_dead_region-----------------------------
+// This control node is dead.  Follow the subgraph below it making everything
+// using it dead as well.  This will happen normally via the usual IterGVN
+// worklist but this call is more efficient.  Do not update use-def info
+// inside the dead region, just at the borders.
+static bool kill_dead_code( Node *dead, PhaseIterGVN *igvn ) {
+  // Con's are a popular node to re-hit in the hash table again.
+  if( dead->is_Con() ) return false;
+
+  // Can't put ResourceMark here since igvn->_worklist uses the same arena
+  // for verify pass with +VerifyOpto and we add/remove elements in it here.
+  Node_List  nstack(Thread::current()->resource_area());
+
+  Node *top = igvn->C->top();
+  bool progress = false;
+  nstack.push(dead);
+
+  while (nstack.size() > 0) {
+    dead = nstack.pop();
+    if (dead->outcnt() > 0) {
+      // Keep dead node on stack until all uses are processed.
+      nstack.push(dead);
+      // For all Users of the Dead...    ;-)
+      for (DUIterator_Last kmin, k = dead->last_outs(kmin); k >= kmin; ) {
+        Node* use = dead->last_out(k);
+        igvn->hash_delete(use);       // Yank from hash table prior to mod
+        if (use->in(0) == dead) {     // Found another dead node
+          assert (!use->is_Con(), "Control for Con node should be Root node.")
+          use->set_req(0, top);       // Cut dead edge to prevent processing
+          nstack.push(use);           // the dead node again.
+        } else {                      // Else found a not-dead user
+          for (uint j = 1; j < use->req(); j++) {
+            if (use->in(j) == dead) { // Turn all dead inputs into TOP
+              use->set_req(j, top);
+            }
+          }
+          igvn->_worklist.push(use);
+        }
+        // Refresh the iterator, since any number of kills might have happened.
+        k = dead->last_outs(kmin);
+      }
+    } else { // (dead->outcnt() == 0)
+      // Done with outputs.
+      igvn->hash_delete(dead);
+      igvn->_worklist.remove(dead);
+      igvn->set_type(dead, Type::TOP);
+      if (dead->is_macro()) {
+        igvn->C->remove_macro_node(dead);
+      }
+      // Kill all inputs to the dead guy
+      for (uint i=0; i < dead->req(); i++) {
+        Node *n = dead->in(i);      // Get input to dead guy
+        if (n != NULL && !n->is_top()) { // Input is valid?
+          progress = true;
+          dead->set_req(i, top);    // Smash input away
+          if (n->outcnt() == 0) {   // Input also goes dead?
+            if (!n->is_Con())
+              nstack.push(n);       // Clear it out as well
+          } else if (n->outcnt() == 1 &&
+                     n->has_special_unique_user()) {
+            igvn->add_users_to_worklist( n );
+          } else if (n->outcnt() <= 2 && n->is_Store()) {
+            // Push store's uses on worklist to enable folding optimization for
+            // store/store and store/load to the same address.
+            // The restriction (outcnt() <= 2) is the same as in set_req_X()
+            // and remove_globally_dead_node().
+            igvn->add_users_to_worklist( n );
+          }
+        }
+      }
+    } // (dead->outcnt() == 0)
+  }   // while (nstack.size() > 0) for outputs
+  return progress;
+}
+
+//------------------------------remove_dead_region-----------------------------
+bool Node::remove_dead_region(PhaseGVN *phase, bool can_reshape) {
+  Node *n = in(0);
+  if( !n ) return false;
+  // Lost control into this guy?  I.e., it became unreachable?
+  // Aggressively kill all unreachable code.
+  if (can_reshape && n->is_top()) {
+    return kill_dead_code(this, phase->is_IterGVN());
+  }
+
+  if( n->is_Region() && n->as_Region()->is_copy() ) {
+    Node *m = n->nonnull_req();
+    set_req(0, m);
+    return true;
+  }
+  return false;
+}
+
+//------------------------------Ideal_DU_postCCP-------------------------------
+// Idealize graph, using DU info.  Must clone result into new-space
+Node *Node::Ideal_DU_postCCP( PhaseCCP * ) {
+  return NULL;                 // Default to no change
+}
+
+//------------------------------hash-------------------------------------------
+// Hash function over Nodes.
+uint Node::hash() const {
+  uint sum = 0;
+  for( uint i=0; i<_cnt; i++ )  // Add in all inputs
+    sum = (sum<<1)-(uintptr_t)in(i);        // Ignore embedded NULLs
+  return (sum>>2) + _cnt + Opcode();
+}
+
+//------------------------------cmp--------------------------------------------
+// Compare special parts of simple Nodes
+uint Node::cmp( const Node &n ) const {
+  return 1;                     // Must be same
+}
+
+//------------------------------rematerialize-----------------------------------
+// Should we clone rather than spill this instruction?
+bool Node::rematerialize() const {
+  if ( is_Mach() )
+    return this->as_Mach()->rematerialize();
+  else
+    return (_flags & Flag_rematerialize) != 0;
+}
+
+//------------------------------needs_anti_dependence_check---------------------
+// Nodes which use memory without consuming it, hence need antidependences.
+bool Node::needs_anti_dependence_check() const {
+  if( req() < 2 || (_flags & Flag_needs_anti_dependence_check) == 0 )
+    return false;
+  else
+    return in(1)->bottom_type()->has_memory();
+}
+
+
+// Get an integer constant from a ConNode (or CastIINode).
+// Return a default value if there is no apparent constant here.
+const TypeInt* Node::find_int_type() const {
+  if (this->is_Type()) {
+    return this->as_Type()->type()->isa_int();
+  } else if (this->is_Con()) {
+    assert(is_Mach(), "should be ConNode(TypeNode) or else a MachNode");
+    return this->bottom_type()->isa_int();
+  }
+  return NULL;
+}
+
+// Get a pointer constant from a ConstNode.
+// Returns the constant if it is a pointer ConstNode
+intptr_t Node::get_ptr() const {
+  assert( Opcode() == Op_ConP, "" );
+  return ((ConPNode*)this)->type()->is_ptr()->get_con();
+}
+
+// Get a long constant from a ConNode.
+// Return a default value if there is no apparent constant here.
+const TypeLong* Node::find_long_type() const {
+  if (this->is_Type()) {
+    return this->as_Type()->type()->isa_long();
+  } else if (this->is_Con()) {
+    assert(is_Mach(), "should be ConNode(TypeNode) or else a MachNode");
+    return this->bottom_type()->isa_long();
+  }
+  return NULL;
+}
+
+// Get a double constant from a ConstNode.
+// Returns the constant if it is a double ConstNode
+jdouble Node::getd() const {
+  assert( Opcode() == Op_ConD, "" );
+  return ((ConDNode*)this)->type()->is_double_constant()->getd();
+}
+
+// Get a float constant from a ConstNode.
+// Returns the constant if it is a float ConstNode
+jfloat Node::getf() const {
+  assert( Opcode() == Op_ConF, "" );
+  return ((ConFNode*)this)->type()->is_float_constant()->getf();
+}
+
+#ifndef PRODUCT
+
+//----------------------------NotANode----------------------------------------
+// Used in debugging code to avoid walking across dead or uninitialized edges.
+static inline bool NotANode(const Node* n) {
+  if (n == NULL)                   return true;
+  if (((intptr_t)n & 1) != 0)      return true;  // uninitialized, etc.
+  if (*(address*)n == badAddress)  return true;  // kill by Node::destruct
+  return false;
+}
+
+
+//------------------------------find------------------------------------------
+// Find a neighbor of this Node with the given _idx
+// If idx is negative, find its absolute value, following both _in and _out.
+static void find_recur( Node* &result, Node *n, int idx, bool only_ctrl,
+                        VectorSet &old_space, VectorSet &new_space ) {
+  int node_idx = (idx >= 0) ? idx : -idx;
+  if (NotANode(n))  return;  // Gracefully handle NULL, -1, 0xabababab, etc.
+  // Contained in new_space or old_space?
+  VectorSet *v = Compile::current()->node_arena()->contains(n) ? &new_space : &old_space;
+  if( v->test(n->_idx) ) return;
+  if( (int)n->_idx == node_idx
+      debug_only(|| n->debug_idx() == node_idx) ) {
+    if (result != NULL)
+      tty->print("find: " INTPTR_FORMAT " and " INTPTR_FORMAT " both have idx==%d\n",
+                 (uintptr_t)result, (uintptr_t)n, node_idx);
+    result = n;
+  }
+  v->set(n->_idx);
+  for( uint i=0; i<n->len(); i++ ) {
+    if( only_ctrl && !(n->is_Region()) && (n->Opcode() != Op_Root) && (i != TypeFunc::Control) ) continue;
+    find_recur( result, n->in(i), idx, only_ctrl, old_space, new_space );
+  }
+  // Search along forward edges also:
+  if (idx < 0 && !only_ctrl) {
+    for( uint j=0; j<n->outcnt(); j++ ) {
+      find_recur( result, n->raw_out(j), idx, only_ctrl, old_space, new_space );
+    }
+  }
+#ifdef ASSERT
+  // Search along debug_orig edges last:
+  for (Node* orig = n->debug_orig(); orig != NULL; orig = orig->debug_orig()) {
+    if (NotANode(orig))  break;
+    find_recur( result, orig, idx, only_ctrl, old_space, new_space );
+  }
+#endif //ASSERT
+}
+
+// call this from debugger:
+Node* find_node(Node* n, int idx) {
+  return n->find(idx);
+}
+
+//------------------------------find-------------------------------------------
+Node* Node::find(int idx) const {
+  ResourceArea *area = Thread::current()->resource_area();
+  VectorSet old_space(area), new_space(area);
+  Node* result = NULL;
+  find_recur( result, (Node*) this, idx, false, old_space, new_space );
+  return result;
+}
+
+//------------------------------find_ctrl--------------------------------------
+// Find an ancestor to this node in the control history with given _idx
+Node* Node::find_ctrl(int idx) const {
+  ResourceArea *area = Thread::current()->resource_area();
+  VectorSet old_space(area), new_space(area);
+  Node* result = NULL;
+  find_recur( result, (Node*) this, idx, true, old_space, new_space );
+  return result;
+}
+#endif
+
+
+
+#ifndef PRODUCT
+int Node::_in_dump_cnt = 0;
+
+// -----------------------------Name-------------------------------------------
+extern const char *NodeClassNames[];
+const char *Node::Name() const { return NodeClassNames[Opcode()]; }
+
+static bool is_disconnected(const Node* n) {
+  for (uint i = 0; i < n->req(); i++) {
+    if (n->in(i) != NULL)  return false;
+  }
+  return true;
+}
+
+#ifdef ASSERT
+static void dump_orig(Node* orig) {
+  Compile* C = Compile::current();
+  if (NotANode(orig))  orig = NULL;
+  if (orig != NULL && !C->node_arena()->contains(orig))  orig = NULL;
+  if (orig == NULL)  return;
+  tty->print(" !orig=");
+  Node* fast = orig->debug_orig(); // tortoise & hare algorithm to detect loops
+  if (NotANode(fast))  fast = NULL;
+  while (orig != NULL) {
+    bool discon = is_disconnected(orig);  // if discon, print [123] else 123
+    if (discon)  tty->print("[");
+    if (!Compile::current()->node_arena()->contains(orig))
+      tty->print("o");
+    tty->print("%d", orig->_idx);
+    if (discon)  tty->print("]");
+    orig = orig->debug_orig();
+    if (NotANode(orig))  orig = NULL;
+    if (orig != NULL && !C->node_arena()->contains(orig))  orig = NULL;
+    if (orig != NULL)  tty->print(",");
+    if (fast != NULL) {
+      // Step fast twice for each single step of orig:
+      fast = fast->debug_orig();
+      if (NotANode(fast))  fast = NULL;
+      if (fast != NULL && fast != orig) {
+        fast = fast->debug_orig();
+        if (NotANode(fast))  fast = NULL;
+      }
+      if (fast == orig) {
+        tty->print("...");
+        break;
+      }
+    }
+  }
+}
+
+void Node::set_debug_orig(Node* orig) {
+  _debug_orig = orig;
+  if (BreakAtNode == 0)  return;
+  if (NotANode(orig))  orig = NULL;
+  int trip = 10;
+  while (orig != NULL) {
+    if (orig->debug_idx() == BreakAtNode || (int)orig->_idx == BreakAtNode) {
+      tty->print_cr("BreakAtNode: _idx=%d _debug_idx=%d orig._idx=%d orig._debug_idx=%d",
+                    this->_idx, this->debug_idx(), orig->_idx, orig->debug_idx());
+      BREAKPOINT;
+    }
+    orig = orig->debug_orig();
+    if (NotANode(orig))  orig = NULL;
+    if (trip-- <= 0)  break;
+  }
+}
+#endif //ASSERT
+
+//------------------------------dump------------------------------------------
+// Dump a Node
+void Node::dump() const {
+  Compile* C = Compile::current();
+  bool is_new = C->node_arena()->contains(this);
+  _in_dump_cnt++;
+  tty->print("%c%d\t%s\t=== ",
+             is_new ? ' ' : 'o', _idx, Name());
+
+  // Dump the required and precedence inputs
+  dump_req();
+  dump_prec();
+  // Dump the outputs
+  dump_out();
+
+  if (is_disconnected(this)) {
+#ifdef ASSERT
+    tty->print("  [%d]",debug_idx());
+    dump_orig(debug_orig());
+#endif
+    tty->cr();
+    _in_dump_cnt--;
+    return;                     // don't process dead nodes
+  }
+
+  // Dump node-specific info
+  dump_spec(tty);
+#ifdef ASSERT
+  // Dump the non-reset _debug_idx
+  if( Verbose && WizardMode ) {
+    tty->print("  [%d]",debug_idx());
+  }
+#endif
+
+  const Type *t = bottom_type();
+
+  if (t != NULL && (t->isa_instptr() || t->isa_klassptr())) {
+    const TypeInstPtr  *toop = t->isa_instptr();
+    const TypeKlassPtr *tkls = t->isa_klassptr();
+    ciKlass*           klass = toop ? toop->klass() : (tkls ? tkls->klass() : NULL );
+    if( klass && klass->is_loaded() && klass->is_interface() ) {
+      tty->print("  Interface:");
+    } else if( toop ) {
+      tty->print("  Oop:");
+    } else if( tkls ) {
+      tty->print("  Klass:");
+    }
+    t->dump();
+  } else if( t == Type::MEMORY ) {
+    tty->print("  Memory:");
+    MemNode::dump_adr_type(this, adr_type(), tty);
+  } else if( Verbose || WizardMode ) {
+    tty->print("  Type:");
+    if( t ) {
+      t->dump();
+    } else {
+      tty->print("no type");
+    }
+  }
+  if (is_new) {
+    debug_only(dump_orig(debug_orig()));
+    Node_Notes* nn = C->node_notes_at(_idx);
+    if (nn != NULL && !nn->is_clear()) {
+      if (nn->jvms() != NULL) {
+        tty->print(" !jvms:");
+        nn->jvms()->dump_spec(tty);
+      }
+    }
+  }
+  tty->cr();
+  _in_dump_cnt--;
+}
+
+//------------------------------dump_req--------------------------------------
+void Node::dump_req() const {
+  // Dump the required input edges
+  for (uint i = 0; i < req(); i++) {    // For all required inputs
+    Node* d = in(i);
+    if (d == NULL) {
+      tty->print("_ ");
+    } else if (NotANode(d)) {
+      tty->print("NotANode ");  // uninitialized, sentinel, garbage, etc.
+    } else {
+      tty->print("%c%d ", Compile::current()->node_arena()->contains(d) ? ' ' : 'o', d->_idx);
+    }
+  }
+}
+
+
+//------------------------------dump_prec-------------------------------------
+void Node::dump_prec() const {
+  // Dump the precedence edges
+  int any_prec = 0;
+  for (uint i = req(); i < len(); i++) {       // For all precedence inputs
+    Node* p = in(i);
+    if (p != NULL) {
+      if( !any_prec++ ) tty->print(" |");
+      if (NotANode(p)) { tty->print("NotANode "); continue; }
+      tty->print("%c%d ", Compile::current()->node_arena()->contains(in(i)) ? ' ' : 'o', in(i)->_idx);
+    }
+  }
+}
+
+//------------------------------dump_out--------------------------------------
+void Node::dump_out() const {
+  // Delimit the output edges
+  tty->print(" [[");
+  // Dump the output edges
+  for (uint i = 0; i < _outcnt; i++) {    // For all outputs
+    Node* u = _out[i];
+    if (u == NULL) {
+      tty->print("_ ");
+    } else if (NotANode(u)) {
+      tty->print("NotANode ");
+    } else {
+      tty->print("%c%d ", Compile::current()->node_arena()->contains(u) ? ' ' : 'o', u->_idx);
+    }
+  }
+  tty->print("]] ");
+}
+
+//------------------------------dump_nodes-------------------------------------
+
+// Helper class  for dump_nodes. Wraps an old and new VectorSet.
+class OldNewVectorSet : public StackObj {
+   Arena*    _node_arena;
+   VectorSet _old_vset, _new_vset;
+   VectorSet* select(Node* n) {
+     return _node_arena->contains(n) ? &_new_vset : &_old_vset;
+   }
+  public:
+  OldNewVectorSet(Arena* node_arena, ResourceArea* area) :
+     _node_arena(node_arena),
+     _old_vset(area), _new_vset(area) {}
+
+  void set(Node* n)      { select(n)->set(n->_idx); }
+  bool test_set(Node* n) { return select(n)->test_set(n->_idx) != 0; }
+  bool test(Node* n)     { return select(n)->test(n->_idx) != 0; }
+  void del(Node* n)      { (*select(n)) >>= n->_idx; }
+};
+
+
+static void dump_nodes(const Node* start, int d, bool only_ctrl) {
+  Node* s = (Node*)start; // remove const
+  if (NotANode(s)) return;
+
+  Compile* C = Compile::current();
+  ResourceArea *area = Thread::current()->resource_area();
+  Node_Stack      stack(area, MIN2((uint)ABS(d), C->unique() >> 1));
+  OldNewVectorSet visited(C->node_arena(), area);
+  OldNewVectorSet on_stack(C->node_arena(), area);
+
+  visited.set(s);
+  on_stack.set(s);
+  stack.push(s, 0);
+  if (d < 0) s->dump();
+
+  // Do a depth first walk over edges
+  while (stack.is_nonempty()) {
+    Node* tp  = stack.node();
+    uint  idx = stack.index();
+    uint  limit = d > 0 ? tp->len() : tp->outcnt();
+    if (idx >= limit) {
+      // no more arcs to visit
+      if (d > 0) tp->dump();
+      on_stack.del(tp);
+      stack.pop();
+    } else {
+      // process the "idx"th arc
+      stack.set_index(idx + 1);
+      Node* n = d > 0 ? tp->in(idx) : tp->raw_out(idx);
+
+      if (NotANode(n))  continue;
+      // do not recurse through top or the root (would reach unrelated stuff)
+      if (n->is_Root() || n->is_top())  continue;
+      if (only_ctrl && !n->is_CFG()) continue;
+
+      if (!visited.test_set(n)) {  // forward arc
+        // Limit depth
+        if (stack.size() < (uint)ABS(d)) {
+          if (d < 0) n->dump();
+          stack.push(n, 0);
+          on_stack.set(n);
+        }
+      } else {  // back or cross arc
+        if (on_stack.test(n)) {  // back arc
+          // print loop if there are no phis or regions in the mix
+          bool found_loop_breaker = false;
+          int k;
+          for (k = stack.size() - 1; k >= 0; k--) {
+            Node* m = stack.node_at(k);
+            if (m->is_Phi() || m->is_Region() || m->is_Root() || m->is_Start()) {
+              found_loop_breaker = true;
+              break;
+            }
+            if (m == n) // Found loop head
+              break;
+          }
+          assert(k >= 0, "n must be on stack");
+
+          if (!found_loop_breaker) {
+            tty->print("# %s LOOP FOUND:", only_ctrl ? "CONTROL" : "DATA");
+            for (int i = stack.size() - 1; i >= k; i--) {
+              Node* m = stack.node_at(i);
+              bool mnew = C->node_arena()->contains(m);
+              tty->print(" %s%d:%s", (mnew? "": "o"), m->_idx, m->Name());
+              if (i != 0) tty->print(d > 0? " <-": " ->");
+            }
+            tty->cr();
+          }
+        }
+      }
+    }
+  }
+}
+
+//------------------------------dump-------------------------------------------
+void Node::dump(int d) const {
+  dump_nodes(this, d, false);
+}
+
+//------------------------------dump_ctrl--------------------------------------
+// Dump a Node's control history to depth
+void Node::dump_ctrl(int d) const {
+  dump_nodes(this, d, true);
+}
+
+// VERIFICATION CODE
+// For each input edge to a node (ie - for each Use-Def edge), verify that
+// there is a corresponding Def-Use edge.
+//------------------------------verify_edges-----------------------------------
+void Node::verify_edges(Unique_Node_List &visited) {
+  uint i, j, idx;
+  int  cnt;
+  Node *n;
+
+  // Recursive termination test
+  if (visited.member(this))  return;
+  visited.push(this);
+
+  // Walk over all input edges, checking for correspondance
+  for( i = 0; i < len(); i++ ) {
+    n = in(i);
+    if (n != NULL && !n->is_top()) {
+      // Count instances of (Node *)this
+      cnt = 0;
+      for (idx = 0; idx < n->_outcnt; idx++ ) {
+        if (n->_out[idx] == (Node *)this)  cnt++;
+      }
+      assert( cnt > 0,"Failed to find Def-Use edge." );
+      // Check for duplicate edges
+      // walk the input array downcounting the input edges to n
+      for( j = 0; j < len(); j++ ) {
+        if( in(j) == n ) cnt--;
+      }
+      assert( cnt == 0,"Mismatched edge count.");
+    } else if (n == NULL) {
+      assert(i >= req() || i == 0 || is_Region() || is_Phi(), "only regions or phis have null data edges");
+    } else {
+      assert(n->is_top(), "sanity");
+      // Nothing to check.
+    }
+  }
+  // Recursive walk over all input edges
+  for( i = 0; i < len(); i++ ) {
+    n = in(i);
+    if( n != NULL )
+      in(i)->verify_edges(visited);
+  }
+}
+
+//------------------------------verify_recur-----------------------------------
+static const Node *unique_top = NULL;
+
+void Node::verify_recur(const Node *n, int verify_depth,
+                        VectorSet &old_space, VectorSet &new_space) {
+  if ( verify_depth == 0 )  return;
+  if (verify_depth > 0)  --verify_depth;
+
+  Compile* C = Compile::current();
+
+  // Contained in new_space or old_space?
+  VectorSet *v = C->node_arena()->contains(n) ? &new_space : &old_space;
+  // Check for visited in the proper space.  Numberings are not unique
+  // across spaces so we need a seperate VectorSet for each space.
+  if( v->test_set(n->_idx) ) return;
+
+  if (n->is_Con() && n->bottom_type() == Type::TOP) {
+    if (C->cached_top_node() == NULL)
+      C->set_cached_top_node((Node*)n);
+    assert(C->cached_top_node() == n, "TOP node must be unique");
+  }
+
+  for( uint i = 0; i < n->len(); i++ ) {
+    Node *x = n->in(i);
+    if (!x || x->is_top()) continue;
+
+    // Verify my input has a def-use edge to me
+    if (true /*VerifyDefUse*/) {
+      // Count use-def edges from n to x
+      int cnt = 0;
+      for( uint j = 0; j < n->len(); j++ )
+        if( n->in(j) == x )
+          cnt++;
+      // Count def-use edges from x to n
+      uint max = x->_outcnt;
+      for( uint k = 0; k < max; k++ )
+        if (x->_out[k] == n)
+          cnt--;
+      assert( cnt == 0, "mismatched def-use edge counts" );
+    }
+
+    verify_recur(x, verify_depth, old_space, new_space);
+  }
+
+}
+
+//------------------------------verify-----------------------------------------
+// Check Def-Use info for my subgraph
+void Node::verify() const {
+  Compile* C = Compile::current();
+  Node* old_top = C->cached_top_node();
+  ResourceMark rm;
+  ResourceArea *area = Thread::current()->resource_area();
+  VectorSet old_space(area), new_space(area);
+  verify_recur(this, -1, old_space, new_space);
+  C->set_cached_top_node(old_top);
+}
+#endif
+
+
+//------------------------------walk-------------------------------------------
+// Graph walk, with both pre-order and post-order functions
+void Node::walk(NFunc pre, NFunc post, void *env) {
+  VectorSet visited(Thread::current()->resource_area()); // Setup for local walk
+  walk_(pre, post, env, visited);
+}
+
+void Node::walk_(NFunc pre, NFunc post, void *env, VectorSet &visited) {
+  if( visited.test_set(_idx) ) return;
+  pre(*this,env);               // Call the pre-order walk function
+  for( uint i=0; i<_max; i++ )
+    if( in(i) )                 // Input exists and is not walked?
+      in(i)->walk_(pre,post,env,visited); // Walk it with pre & post functions
+  post(*this,env);              // Call the post-order walk function
+}
+
+void Node::nop(Node &, void*) {}
+
+//------------------------------Registers--------------------------------------
+// Do we Match on this edge index or not?  Generally false for Control
+// and true for everything else.  Weird for calls & returns.
+uint Node::match_edge(uint idx) const {
+  return idx;                   // True for other than index 0 (control)
+}
+
+// Register classes are defined for specific machines
+const RegMask &Node::out_RegMask() const {
+  ShouldNotCallThis();
+  return *(new RegMask());
+}
+
+const RegMask &Node::in_RegMask(uint) const {
+  ShouldNotCallThis();
+  return *(new RegMask());
+}
+
+//=============================================================================
+//-----------------------------------------------------------------------------
+void Node_Array::reset( Arena *new_arena ) {
+  _a->Afree(_nodes,_max*sizeof(Node*));
+  _max   = 0;
+  _nodes = NULL;
+  _a     = new_arena;
+}
+
+//------------------------------clear------------------------------------------
+// Clear all entries in _nodes to NULL but keep storage
+void Node_Array::clear() {
+  Copy::zero_to_bytes( _nodes, _max*sizeof(Node*) );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::grow( uint i ) {
+  if( !_max ) {
+    _max = 1;
+    _nodes = (Node**)_a->Amalloc( _max * sizeof(Node*) );
+    _nodes[0] = NULL;
+  }
+  uint old = _max;
+  while( i >= _max ) _max <<= 1;        // Double to fit
+  _nodes = (Node**)_a->Arealloc( _nodes, old*sizeof(Node*),_max*sizeof(Node*));
+  Copy::zero_to_bytes( &_nodes[old], (_max-old)*sizeof(Node*) );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::insert( uint i, Node *n ) {
+  if( _nodes[_max-1] ) grow(_max);      // Get more space if full
+  Copy::conjoint_words_to_higher((HeapWord*)&_nodes[i], (HeapWord*)&_nodes[i+1], ((_max-i-1)*sizeof(Node*)));
+  _nodes[i] = n;
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::remove( uint i ) {
+  Copy::conjoint_words_to_lower((HeapWord*)&_nodes[i+1], (HeapWord*)&_nodes[i], ((_max-i-1)*sizeof(Node*)));
+  _nodes[_max-1] = NULL;
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::sort( C_sort_func_t func) {
+  qsort( _nodes, _max, sizeof( Node* ), func );
+}
+
+//-----------------------------------------------------------------------------
+void Node_Array::dump() const {
+#ifndef PRODUCT
+  for( uint i = 0; i < _max; i++ ) {
+    Node *nn = _nodes[i];
+    if( nn != NULL ) {
+      tty->print("%5d--> ",i); nn->dump();
+    }
+  }
+#endif
+}
+
+//--------------------------is_iteratively_computed------------------------------
+// Operation appears to be iteratively computed (such as an induction variable)
+// It is possible for this operation to return false for a loop-varying
+// value, if it appears (by local graph inspection) to be computed by a simple conditional.
+bool Node::is_iteratively_computed() {
+  if (ideal_reg()) { // does operation have a result register?
+    for (uint i = 1; i < req(); i++) {
+      Node* n = in(i);
+      if (n != NULL && n->is_Phi()) {
+        for (uint j = 1; j < n->req(); j++) {
+          if (n->in(j) == this) {
+            return true;
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+//--------------------------find_similar------------------------------
+// Return a node with opcode "opc" and same inputs as "this" if one can
+// be found; Otherwise return NULL;
+Node* Node::find_similar(int opc) {
+  if (req() >= 2) {
+    Node* def = in(1);
+    if (def && def->outcnt() >= 2) {
+      for (DUIterator_Fast dmax, i = def->fast_outs(dmax); i < dmax; i++) {
+        Node* use = def->fast_out(i);
+        if (use->Opcode() == opc &&
+            use->req() == req()) {
+          uint j;
+          for (j = 0; j < use->req(); j++) {
+            if (use->in(j) != in(j)) {
+              break;
+            }
+          }
+          if (j == use->req()) {
+            return use;
+          }
+        }
+      }
+    }
+  }
+  return NULL;
+}
+
+
+//--------------------------unique_ctrl_out------------------------------
+// Return the unique control out if only one. Null if none or more than one.
+Node* Node::unique_ctrl_out() {
+  Node* found = NULL;
+  for (uint i = 0; i < outcnt(); i++) {
+    Node* use = raw_out(i);
+    if (use->is_CFG() && use != this) {
+      if (found != NULL) return NULL;
+      found = use;
+    }
+  }
+  return found;
+}
+
+//=============================================================================
+//------------------------------yank-------------------------------------------
+// Find and remove
+void Node_List::yank( Node *n ) {
+  uint i;
+  for( i = 0; i < _cnt; i++ )
+    if( _nodes[i] == n )
+      break;
+
+  if( i < _cnt )
+    _nodes[i] = _nodes[--_cnt];
+}
+
+//------------------------------dump-------------------------------------------
+void Node_List::dump() const {
+#ifndef PRODUCT
+  for( uint i = 0; i < _cnt; i++ )
+    if( _nodes[i] ) {
+      tty->print("%5d--> ",i);
+      _nodes[i]->dump();
+    }
+#endif
+}
+
+//=============================================================================
+//------------------------------remove-----------------------------------------
+void Unique_Node_List::remove( Node *n ) {
+  if( _in_worklist[n->_idx] ) {
+    for( uint i = 0; i < size(); i++ )
+      if( _nodes[i] == n ) {
+        map(i,Node_List::pop());
+        _in_worklist >>= n->_idx;
+        return;
+      }
+    ShouldNotReachHere();
+  }
+}
+
+//-----------------------remove_useless_nodes----------------------------------
+// Remove useless nodes from worklist
+void Unique_Node_List::remove_useless_nodes(VectorSet &useful) {
+
+  for( uint i = 0; i < size(); ++i ) {
+    Node *n = at(i);
+    assert( n != NULL, "Did not expect null entries in worklist");
+    if( ! useful.test(n->_idx) ) {
+      _in_worklist >>= n->_idx;
+      map(i,Node_List::pop());
+      // Node *replacement = Node_List::pop();
+      // if( i != size() ) { // Check if removing last entry
+      //   _nodes[i] = replacement;
+      // }
+      --i;  // Visit popped node
+      // If it was last entry, loop terminates since size() was also reduced
+    }
+  }
+}
+
+//=============================================================================
+void Node_Stack::grow() {
+  size_t old_top = pointer_delta(_inode_top,_inodes,sizeof(INode)); // save _top
+  size_t old_max = pointer_delta(_inode_max,_inodes,sizeof(INode));
+  size_t max = old_max << 1;             // max * 2
+  _inodes = REALLOC_ARENA_ARRAY(_a, INode, _inodes, old_max, max);
+  _inode_max = _inodes + max;
+  _inode_top = _inodes + old_top;        // restore _top
+}
+
+//=============================================================================
+uint TypeNode::size_of() const { return sizeof(*this); }
+#ifndef PRODUCT
+void TypeNode::dump_spec(outputStream *st) const {
+  if( !Verbose && !WizardMode ) {
+    // standard dump does this in Verbose and WizardMode
+    st->print(" #"); _type->dump_on(st);
+  }
+}
+#endif
+uint TypeNode::hash() const {
+  return Node::hash() + _type->hash();
+}
+uint TypeNode::cmp( const Node &n ) const
+{ return !Type::cmp( _type, ((TypeNode&)n)._type ); }
+const Type *TypeNode::bottom_type() const { return _type; }
+const Type *TypeNode::Value( PhaseTransform * ) const { return _type; }
+
+//------------------------------ideal_reg--------------------------------------
+uint TypeNode::ideal_reg() const {
+  return Matcher::base2reg[_type->base()];
+}
diff --git a/src/share/vm/opto/node.hpp b/src/share/vm/opto/node.hpp
new file mode 100644
index 000000000..f93562c09
--- /dev/null
+++ b/src/share/vm/opto/node.hpp
@@ -0,0 +1,1492 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+class AbstractLockNode;
+class AddNode;
+class AddPNode;
+class AliasInfo;
+class AllocateArrayNode;
+class AllocateNode;
+class Block;
+class Block_Array;
+class BoolNode;
+class BoxLockNode;
+class CMoveNode;
+class CallDynamicJavaNode;
+class CallJavaNode;
+class CallLeafNode;
+class CallNode;
+class CallRuntimeNode;
+class CallStaticJavaNode;
+class CatchNode;
+class CatchProjNode;
+class CheckCastPPNode;
+class CmpNode;
+class CodeBuffer;
+class ConstraintCastNode;
+class ConNode;
+class CountedLoopNode;
+class CountedLoopEndNode;
+class FastLockNode;
+class FastUnlockNode;
+class IfNode;
+class InitializeNode;
+class JVMState;
+class JumpNode;
+class JumpProjNode;
+class LoadNode;
+class LoadStoreNode;
+class LockNode;
+class LoopNode;
+class MachCallDynamicJavaNode;
+class MachCallJavaNode;
+class MachCallLeafNode;
+class MachCallNode;
+class MachCallRuntimeNode;
+class MachCallStaticJavaNode;
+class MachIfNode;
+class MachNode;
+class MachNullCheckNode;
+class MachReturnNode;
+class MachSafePointNode;
+class MachSpillCopyNode;
+class MachTempNode;
+class Matcher;
+class MemBarNode;
+class MemNode;
+class MergeMemNode;
+class MulNode;
+class MultiNode;
+class MultiBranchNode;
+class NeverBranchNode;
+class Node;
+class Node_Array;
+class Node_List;
+class Node_Stack;
+class NullCheckNode;
+class OopMap;
+class PCTableNode;
+class PhaseCCP;
+class PhaseGVN;
+class PhaseIterGVN;
+class PhaseRegAlloc;
+class PhaseTransform;
+class PhaseValues;
+class PhiNode;
+class Pipeline;
+class ProjNode;
+class RegMask;
+class RegionNode;
+class RootNode;
+class SafePointNode;
+class StartNode;
+class State;
+class StoreNode;
+class SubNode;
+class Type;
+class TypeNode;
+class UnlockNode;
+class VectorSet;
+class IfTrueNode;
+class IfFalseNode;
+typedef void (*NFunc)(Node&,void*);
+extern "C" {
+  typedef int (*C_sort_func_t)(const void *, const void *);
+}
+
+// The type of all node counts and indexes.
+// It must hold at least 16 bits, but must also be fast to load and store.
+// This type, if less than 32 bits, could limit the number of possible nodes.
+// (To make this type platform-specific, move to globalDefinitions_xxx.hpp.)
+typedef unsigned int node_idx_t;
+
+
+#ifndef OPTO_DU_ITERATOR_ASSERT
+#ifdef ASSERT
+#define OPTO_DU_ITERATOR_ASSERT 1
+#else
+#define OPTO_DU_ITERATOR_ASSERT 0
+#endif
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+#if OPTO_DU_ITERATOR_ASSERT
+class DUIterator;
+class DUIterator_Fast;
+class DUIterator_Last;
+#else
+typedef uint   DUIterator;
+typedef Node** DUIterator_Fast;
+typedef Node** DUIterator_Last;
+#endif
+
+// Node Sentinel
+#define NodeSentinel (Node*)-1
+
+// Unknown count frequency
+#define COUNT_UNKNOWN (-1.0f)
+
+//------------------------------Node-------------------------------------------
+// Nodes define actions in the program.  They create values, which have types.
+// They are both vertices in a directed graph and program primitives.  Nodes
+// are labeled; the label is the "opcode", the primitive function in the lambda
+// calculus sense that gives meaning to the Node.  Node inputs are ordered (so
+// that "a-b" is different from "b-a").  The inputs to a Node are the inputs to
+// the Node's function.  These inputs also define a Type equation for the Node.
+// Solving these Type equations amounts to doing dataflow analysis.
+// Control and data are uniformly represented in the graph.  Finally, Nodes
+// have a unique dense integer index which is used to index into side arrays
+// whenever I have phase-specific information.
+
+class Node {
+  // Lots of restrictions on cloning Nodes
+  Node(const Node&);            // not defined; linker error to use these
+  Node &operator=(const Node &rhs);
+
+public:
+  friend class Compile;
+  #if OPTO_DU_ITERATOR_ASSERT
+  friend class DUIterator_Common;
+  friend class DUIterator;
+  friend class DUIterator_Fast;
+  friend class DUIterator_Last;
+  #endif
+
+  // Because Nodes come and go, I define an Arena of Node structures to pull
+  // from.  This should allow fast access to node creation & deletion.  This
+  // field is a local cache of a value defined in some "program fragment" for
+  // which these Nodes are just a part of.
+
+  // New Operator that takes a Compile pointer, this will eventually
+  // be the "new" New operator.
+  inline void* operator new( size_t x, Compile* C) {
+    Node* n = (Node*)C->node_arena()->Amalloc_D(x);
+#ifdef ASSERT
+    n->_in = (Node**)n; // magic cookie for assertion check
+#endif
+    n->_out = (Node**)C;
+    return (void*)n;
+  }
+
+  // New Operator that takes a Compile pointer, this will eventually
+  // be the "new" New operator.
+  inline void* operator new( size_t x, Compile* C, int y) {
+    Node* n = (Node*)C->node_arena()->Amalloc_D(x + y*sizeof(void*));
+    n->_in = (Node**)(((char*)n) + x);
+#ifdef ASSERT
+    n->_in[y-1] = n; // magic cookie for assertion check
+#endif
+    n->_out = (Node**)C;
+    return (void*)n;
+  }
+
+  // Delete is a NOP
+  void operator delete( void *ptr ) {}
+  // Fancy destructor; eagerly attempt to reclaim Node numberings and storage
+  void destruct();
+
+  // Create a new Node.  Required is the number is of inputs required for
+  // semantic correctness.
+  Node( uint required );
+
+  // Create a new Node with given input edges.
+  // This version requires use of the "edge-count" new.
+  // E.g.  new (C,3) FooNode( C, NULL, left, right );
+  Node( Node *n0 );
+  Node( Node *n0, Node *n1 );
+  Node( Node *n0, Node *n1, Node *n2 );
+  Node( Node *n0, Node *n1, Node *n2, Node *n3 );
+  Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4 );
+  Node( Node *n0, Node *n1, Node *n2, Node *n3, Node *n4, Node *n5 );
+  Node( Node *n0, Node *n1, Node *n2, Node *n3,
+            Node *n4, Node *n5, Node *n6 );
+
+  // Clone an inherited Node given only the base Node type.
+  Node* clone() const;
+
+  // Clone a Node, immediately supplying one or two new edges.
+  // The first and second arguments, if non-null, replace in(1) and in(2),
+  // respectively.
+  Node* clone_with_data_edge(Node* in1, Node* in2 = NULL) const {
+    Node* nn = clone();
+    if (in1 != NULL)  nn->set_req(1, in1);
+    if (in2 != NULL)  nn->set_req(2, in2);
+    return nn;
+  }
+
+private:
+  // Shared setup for the above constructors.
+  // Handles all interactions with Compile::current.
+  // Puts initial values in all Node fields except _idx.
+  // Returns the initial value for _idx, which cannot
+  // be initialized by assignment.
+  inline int Init(int req, Compile* C);
+
+//----------------- input edge handling
+protected:
+  friend class PhaseCFG;        // Access to address of _in array elements
+  Node **_in;                   // Array of use-def references to Nodes
+  Node **_out;                  // Array of def-use references to Nodes
+
+  // Input edges are split into two catagories.  Required edges are required
+  // for semantic correctness; order is important and NULLs are allowed.
+  // Precedence edges are used to help determine execution order and are
+  // added, e.g., for scheduling purposes.  They are unordered and not
+  // duplicated; they have no embedded NULLs.  Edges from 0 to _cnt-1
+  // are required, from _cnt to _max-1 are precedence edges.
+  node_idx_t _cnt;              // Total number of required Node inputs.
+
+  node_idx_t _max;              // Actual length of input array.
+
+  // Output edges are an unordered list of def-use edges which exactly
+  // correspond to required input edges which point from other nodes
+  // to this one.  Thus the count of the output edges is the number of
+  // users of this node.
+  node_idx_t _outcnt;           // Total number of Node outputs.
+
+  node_idx_t _outmax;           // Actual length of output array.
+
+  // Grow the actual input array to the next larger power-of-2 bigger than len.
+  void grow( uint len );
+  // Grow the output array to the next larger power-of-2 bigger than len.
+  void out_grow( uint len );
+
+ public:
+  // Each Node is assigned a unique small/dense number.  This number is used
+  // to index into auxiliary arrays of data and bitvectors.
+  // It is declared const to defend against inadvertant assignment,
+  // since it is used by clients as a naked field.
+  const node_idx_t _idx;
+
+  // Get the (read-only) number of input edges
+  uint req() const { return _cnt; }
+  uint len() const { return _max; }
+  // Get the (read-only) number of output edges
+  uint outcnt() const { return _outcnt; }
+
+#if OPTO_DU_ITERATOR_ASSERT
+  // Iterate over the out-edges of this node.  Deletions are illegal.
+  inline DUIterator outs() const;
+  // Use this when the out array might have changed to suppress asserts.
+  inline DUIterator& refresh_out_pos(DUIterator& i) const;
+  // Does the node have an out at this position?  (Used for iteration.)
+  inline bool has_out(DUIterator& i) const;
+  inline Node*    out(DUIterator& i) const;
+  // Iterate over the out-edges of this node.  All changes are illegal.
+  inline DUIterator_Fast fast_outs(DUIterator_Fast& max) const;
+  inline Node*    fast_out(DUIterator_Fast& i) const;
+  // Iterate over the out-edges of this node, deleting one at a time.
+  inline DUIterator_Last last_outs(DUIterator_Last& min) const;
+  inline Node*    last_out(DUIterator_Last& i) const;
+  // The inline bodies of all these methods are after the iterator definitions.
+#else
+  // Iterate over the out-edges of this node.  Deletions are illegal.
+  // This iteration uses integral indexes, to decouple from array reallocations.
+  DUIterator outs() const  { return 0; }
+  // Use this when the out array might have changed to suppress asserts.
+  DUIterator refresh_out_pos(DUIterator i) const { return i; }
+
+  // Reference to the i'th output Node.  Error if out of bounds.
+  Node*    out(DUIterator i) const { assert(i < _outcnt, "oob"); return _out[i]; }
+  // Does the node have an out at this position?  (Used for iteration.)
+  bool has_out(DUIterator i) const { return i < _outcnt; }
+
+  // Iterate over the out-edges of this node.  All changes are illegal.
+  // This iteration uses a pointer internal to the out array.
+  DUIterator_Fast fast_outs(DUIterator_Fast& max) const {
+    Node** out = _out;
+    // Assign a limit pointer to the reference argument:
+    max = out + (ptrdiff_t)_outcnt;
+    // Return the base pointer:
+    return out;
+  }
+  Node*    fast_out(DUIterator_Fast i) const  { return *i; }
+  // Iterate over the out-edges of this node, deleting one at a time.
+  // This iteration uses a pointer internal to the out array.
+  DUIterator_Last last_outs(DUIterator_Last& min) const {
+    Node** out = _out;
+    // Assign a limit pointer to the reference argument:
+    min = out;
+    // Return the pointer to the start of the iteration:
+    return out + (ptrdiff_t)_outcnt - 1;
+  }
+  Node*    last_out(DUIterator_Last i) const  { return *i; }
+#endif
+
+  // Reference to the i'th input Node.  Error if out of bounds.
+  Node* in(uint i) const { assert(i < _max,"oob"); return _in[i]; }
+  // Reference to the i'th output Node.  Error if out of bounds.
+  // Use this accessor sparingly.  We are going trying to use iterators instead.
+  Node* raw_out(uint i) const { assert(i < _outcnt,"oob"); return _out[i]; }
+  // Return the unique out edge.
+  Node* unique_out() const { assert(_outcnt==1,"not unique"); return _out[0]; }
+  // Delete out edge at position 'i' by moving last out edge to position 'i'
+  void  raw_del_out(uint i) {
+    assert(i < _outcnt,"oob");
+    assert(_outcnt > 0,"oob");
+    #if OPTO_DU_ITERATOR_ASSERT
+    // Record that a change happened here.
+    debug_only(_last_del = _out[i]; ++_del_tick);
+    #endif
+    _out[i] = _out[--_outcnt];
+    // Smash the old edge so it can't be used accidentally.
+    debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
+  }
+
+#ifdef ASSERT
+  bool is_dead() const;
+#define is_not_dead(n) ((n) == NULL || !VerifyIterativeGVN || !((n)->is_dead()))
+#endif
+
+  // Set a required input edge, also updates corresponding output edge
+  void add_req( Node *n ); // Append a NEW required input
+  void add_req_batch( Node* n, uint m ); // Append m NEW required inputs (all n).
+  void del_req( uint idx ); // Delete required edge & compact
+  void ins_req( uint i, Node *n ); // Insert a NEW required input
+  void set_req( uint i, Node *n ) {
+    assert( is_not_dead(n), "can not use dead node");
+    assert( i < _cnt, "oob");
+    assert( !VerifyHashTableKeys || _hash_lock == 0,
+            "remove node from hash table before modifying it");
+    Node** p = &_in[i];    // cache this._in, across the del_out call
+    if (*p != NULL)  (*p)->del_out((Node *)this);
+    (*p) = n;
+    if (n != NULL)      n->add_out((Node *)this);
+  }
+  // Light version of set_req() to init inputs after node creation.
+  void init_req( uint i, Node *n ) {
+    assert( i == 0 && this == n ||
+            is_not_dead(n), "can not use dead node");
+    assert( i < _cnt, "oob");
+    assert( !VerifyHashTableKeys || _hash_lock == 0,
+            "remove node from hash table before modifying it");
+    assert( _in[i] == NULL, "sanity");
+    _in[i] = n;
+    if (n != NULL)      n->add_out((Node *)this);
+  }
+  // Find first occurrence of n among my edges:
+  int find_edge(Node* n);
+  int replace_edge(Node* old, Node* neww);
+  // NULL out all inputs to eliminate incoming Def-Use edges.
+  // Return the number of edges between 'n' and 'this'
+  int  disconnect_inputs(Node *n);
+
+  // Quickly, return true if and only if I am Compile::current()->top().
+  bool is_top() const {
+    assert((this == (Node*) Compile::current()->top()) == (_out == NULL), "");
+    return (_out == NULL);
+  }
+  // Reaffirm invariants for is_top.  (Only from Compile::set_cached_top_node.)
+  void setup_is_top();
+
+  // Strip away casting.  (It is depth-limited.)
+  Node* uncast() const;
+
+private:
+  static Node* uncast_helper(const Node* n);
+
+  // Add an output edge to the end of the list
+  void add_out( Node *n ) {
+    if (is_top())  return;
+    if( _outcnt == _outmax ) out_grow(_outcnt);
+    _out[_outcnt++] = n;
+  }
+  // Delete an output edge
+  void del_out( Node *n ) {
+    if (is_top())  return;
+    Node** outp = &_out[_outcnt];
+    // Find and remove n
+    do {
+      assert(outp > _out, "Missing Def-Use edge");
+    } while (*--outp != n);
+    *outp = _out[--_outcnt];
+    // Smash the old edge so it can't be used accidentally.
+    debug_only(_out[_outcnt] = (Node *)(uintptr_t)0xdeadbeef);
+    // Record that a change happened here.
+    #if OPTO_DU_ITERATOR_ASSERT
+    debug_only(_last_del = n; ++_del_tick);
+    #endif
+  }
+
+public:
+  // Globally replace this node by a given new node, updating all uses.
+  void replace_by(Node* new_node);
+  void set_req_X( uint i, Node *n, PhaseIterGVN *igvn );
+  // Find the one non-null required input.  RegionNode only
+  Node *nonnull_req() const;
+  // Add or remove precedence edges
+  void add_prec( Node *n );
+  void rm_prec( uint i );
+  void set_prec( uint i, Node *n ) {
+    assert( is_not_dead(n), "can not use dead node");
+    assert( i >= _cnt, "not a precedence edge");
+    if (_in[i] != NULL) _in[i]->del_out((Node *)this);
+    _in[i] = n;
+    if (n != NULL) n->add_out((Node *)this);
+  }
+  // Set this node's index, used by cisc_version to replace current node
+  void set_idx(uint new_idx) {
+    const node_idx_t* ref = &_idx;
+    *(node_idx_t*)ref = new_idx;
+  }
+  // Swap input edge order.  (Edge indexes i1 and i2 are usually 1 and 2.)
+  void swap_edges(uint i1, uint i2) {
+    debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+    // Def-Use info is unchanged
+    Node* n1 = in(i1);
+    Node* n2 = in(i2);
+    _in[i1] = n2;
+    _in[i2] = n1;
+    // If this node is in the hash table, make sure it doesn't need a rehash.
+    assert(check_hash == NO_HASH || check_hash == hash(), "edge swap must preserve hash code");
+  }
+
+  // Iterators over input Nodes for a Node X are written as:
+  // for( i = 0; i < X.req(); i++ ) ... X[i] ...
+  // NOTE: Required edges can contain embedded NULL pointers.
+
+//----------------- Other Node Properties
+
+  // Generate class id for some ideal nodes to avoid virtual query
+  // methods is_<Node>().
+  // Class id is the set of bits corresponded to the node class and all its
+  // super classes so that queries for super classes are also valid.
+  // Subclasses of the same super class have different assigned bit
+  // (the third parameter in the macro DEFINE_CLASS_ID).
+  // Classes with deeper hierarchy are declared first.
+  // Classes with the same hierarchy depth are sorted by usage frequency.
+  //
+  // The query method masks the bits to cut off bits of subclasses
+  // and then compare the result with the class id
+  // (see the macro DEFINE_CLASS_QUERY below).
+  //
+  //  Class_MachCall=30, ClassMask_MachCall=31
+  // 12               8               4               0
+  //  0   0   0   0   0   0   0   0   1   1   1   1   0
+  //                                  |   |   |   |
+  //                                  |   |   |   Bit_Mach=2
+  //                                  |   |   Bit_MachReturn=4
+  //                                  |   Bit_MachSafePoint=8
+  //                                  Bit_MachCall=16
+  //
+  //  Class_CountedLoop=56, ClassMask_CountedLoop=63
+  // 12               8               4               0
+  //  0   0   0   0   0   0   0   1   1   1   0   0   0
+  //                              |   |   |
+  //                              |   |   Bit_Region=8
+  //                              |   Bit_Loop=16
+  //                              Bit_CountedLoop=32
+
+  #define DEFINE_CLASS_ID(cl, supcl, subn) \
+  Bit_##cl = (Class_##supcl == 0) ? 1 << subn : (Bit_##supcl) << (1 + subn) , \
+  Class_##cl = Class_##supcl + Bit_##cl , \
+  ClassMask_##cl = ((Bit_##cl << 1) - 1) ,
+
+  // This enum is used only for C2 ideal and mach nodes with is_<node>() methods
+  // so that it's values fits into 16 bits.
+  enum NodeClasses {
+    Bit_Node   = 0x0000,
+    Class_Node = 0x0000,
+    ClassMask_Node = 0xFFFF,
+
+    DEFINE_CLASS_ID(Multi, Node, 0)
+      DEFINE_CLASS_ID(SafePoint, Multi, 0)
+        DEFINE_CLASS_ID(Call,      SafePoint, 0)
+          DEFINE_CLASS_ID(CallJava,         Call, 0)
+            DEFINE_CLASS_ID(CallStaticJava,   CallJava, 0)
+            DEFINE_CLASS_ID(CallDynamicJava,  CallJava, 1)
+          DEFINE_CLASS_ID(CallRuntime,      Call, 1)
+            DEFINE_CLASS_ID(CallLeaf,         CallRuntime, 0)
+          DEFINE_CLASS_ID(Allocate,         Call, 2)
+            DEFINE_CLASS_ID(AllocateArray,    Allocate, 0)
+          DEFINE_CLASS_ID(AbstractLock,     Call, 3)
+            DEFINE_CLASS_ID(Lock,             AbstractLock, 0)
+            DEFINE_CLASS_ID(Unlock,           AbstractLock, 1)
+      DEFINE_CLASS_ID(MultiBranch, Multi, 1)
+        DEFINE_CLASS_ID(PCTable,     MultiBranch, 0)
+          DEFINE_CLASS_ID(Catch,       PCTable, 0)
+          DEFINE_CLASS_ID(Jump,        PCTable, 1)
+        DEFINE_CLASS_ID(If,          MultiBranch, 1)
+          DEFINE_CLASS_ID(CountedLoopEnd, If, 0)
+        DEFINE_CLASS_ID(NeverBranch, MultiBranch, 2)
+      DEFINE_CLASS_ID(Start,       Multi, 2)
+      DEFINE_CLASS_ID(MemBar,      Multi, 3)
+        DEFINE_CLASS_ID(Initialize,    MemBar, 0)
+
+    DEFINE_CLASS_ID(Mach,  Node, 1)
+      DEFINE_CLASS_ID(MachReturn, Mach, 0)
+        DEFINE_CLASS_ID(MachSafePoint, MachReturn, 0)
+          DEFINE_CLASS_ID(MachCall, MachSafePoint, 0)
+            DEFINE_CLASS_ID(MachCallJava,         MachCall, 0)
+              DEFINE_CLASS_ID(MachCallStaticJava,   MachCallJava, 0)
+              DEFINE_CLASS_ID(MachCallDynamicJava,  MachCallJava, 1)
+            DEFINE_CLASS_ID(MachCallRuntime,      MachCall, 1)
+              DEFINE_CLASS_ID(MachCallLeaf,         MachCallRuntime, 0)
+      DEFINE_CLASS_ID(MachSpillCopy, Mach, 1)
+      DEFINE_CLASS_ID(MachNullCheck, Mach, 2)
+      DEFINE_CLASS_ID(MachIf,        Mach, 3)
+      DEFINE_CLASS_ID(MachTemp,      Mach, 4)
+
+    DEFINE_CLASS_ID(Proj,  Node, 2)
+      DEFINE_CLASS_ID(CatchProj, Proj, 0)
+      DEFINE_CLASS_ID(JumpProj,  Proj, 1)
+      DEFINE_CLASS_ID(IfTrue,    Proj, 2)
+      DEFINE_CLASS_ID(IfFalse,   Proj, 3)
+
+    DEFINE_CLASS_ID(Region, Node, 3)
+      DEFINE_CLASS_ID(Loop, Region, 0)
+        DEFINE_CLASS_ID(Root,        Loop, 0)
+        DEFINE_CLASS_ID(CountedLoop, Loop, 1)
+
+    DEFINE_CLASS_ID(Sub,   Node, 4)
+      DEFINE_CLASS_ID(Cmp,   Sub, 0)
+        DEFINE_CLASS_ID(FastLock,   Cmp, 0)
+        DEFINE_CLASS_ID(FastUnlock, Cmp, 1)
+
+    DEFINE_CLASS_ID(Type,  Node, 5)
+      DEFINE_CLASS_ID(Phi,   Type, 0)
+      DEFINE_CLASS_ID(ConstraintCast, Type, 1)
+      DEFINE_CLASS_ID(CheckCastPP, Type, 2)
+      DEFINE_CLASS_ID(CMove, Type, 3)
+
+    DEFINE_CLASS_ID(Mem,   Node, 6)
+      DEFINE_CLASS_ID(Load,  Mem, 0)
+      DEFINE_CLASS_ID(Store, Mem, 1)
+      DEFINE_CLASS_ID(LoadStore, Mem, 2)
+
+    DEFINE_CLASS_ID(MergeMem, Node, 7)
+    DEFINE_CLASS_ID(Bool,     Node, 8)
+    DEFINE_CLASS_ID(AddP,     Node, 9)
+    DEFINE_CLASS_ID(BoxLock,  Node, 10)
+    DEFINE_CLASS_ID(Add,      Node, 11)
+    DEFINE_CLASS_ID(Mul,      Node, 12)
+
+    _max_classes  = ClassMask_Mul
+  };
+  #undef DEFINE_CLASS_ID
+
+  // Flags are sorted by usage frequency.
+  enum NodeFlags {
+    Flag_is_Copy             = 0x01, // should be first bit to avoid shift
+    Flag_is_Call             = Flag_is_Copy << 1,
+    Flag_rematerialize       = Flag_is_Call << 1,
+    Flag_needs_anti_dependence_check = Flag_rematerialize << 1,
+    Flag_is_macro            = Flag_needs_anti_dependence_check << 1,
+    Flag_is_Con              = Flag_is_macro << 1,
+    Flag_is_cisc_alternate   = Flag_is_Con << 1,
+    Flag_is_Branch           = Flag_is_cisc_alternate << 1,
+    Flag_is_block_start      = Flag_is_Branch << 1,
+    Flag_is_Goto             = Flag_is_block_start << 1,
+    Flag_is_dead_loop_safe   = Flag_is_Goto << 1,
+    Flag_may_be_short_branch = Flag_is_dead_loop_safe << 1,
+    Flag_is_safepoint_node   = Flag_may_be_short_branch << 1,
+    Flag_is_pc_relative      = Flag_is_safepoint_node << 1,
+    Flag_is_Vector           = Flag_is_pc_relative << 1,
+    _max_flags = (Flag_is_Vector << 1) - 1 // allow flags combination
+  };
+
+private:
+  jushort _class_id;
+  jushort _flags;
+
+protected:
+  // These methods should be called from constructors only.
+  void init_class_id(jushort c) {
+    assert(c <= _max_classes, "invalid node class");
+    _class_id = c; // cast out const
+  }
+  void init_flags(jushort fl) {
+    assert(fl <= _max_flags, "invalid node flag");
+    _flags |= fl;
+  }
+  void clear_flag(jushort fl) {
+    assert(fl <= _max_flags, "invalid node flag");
+    _flags &= ~fl;
+  }
+
+public:
+  const jushort class_id() const { return _class_id; }
+
+  const jushort flags() const { return _flags; }
+
+  // Return a dense integer opcode number
+  virtual int Opcode() const;
+
+  // Virtual inherited Node size
+  virtual uint size_of() const;
+
+  // Other interesting Node properties
+
+  // Special case: is_Call() returns true for both CallNode and MachCallNode.
+  bool is_Call() const {
+    return (_flags & Flag_is_Call) != 0;
+  }
+
+  CallNode *as_Call() const { // Only for CallNode (not for MachCallNode)
+    assert((_class_id & ClassMask_Call) == Class_Call, "invalid node class");
+    return (CallNode*)this;
+  }
+
+  #define DEFINE_CLASS_QUERY(type) \
+  bool is_##type() const { \
+    return ((_class_id & ClassMask_##type) == Class_##type); \
+  } \
+  type##Node *as_##type() const { \
+    assert(is_##type(), "invalid node class"); \
+    return (type##Node*)this; \
+  }
+
+  DEFINE_CLASS_QUERY(AbstractLock)
+  DEFINE_CLASS_QUERY(Add)
+  DEFINE_CLASS_QUERY(AddP)
+  DEFINE_CLASS_QUERY(Allocate)
+  DEFINE_CLASS_QUERY(AllocateArray)
+  DEFINE_CLASS_QUERY(Bool)
+  DEFINE_CLASS_QUERY(BoxLock)
+  DEFINE_CLASS_QUERY(CallDynamicJava)
+  DEFINE_CLASS_QUERY(CallJava)
+  DEFINE_CLASS_QUERY(CallLeaf)
+  DEFINE_CLASS_QUERY(CallRuntime)
+  DEFINE_CLASS_QUERY(CallStaticJava)
+  DEFINE_CLASS_QUERY(Catch)
+  DEFINE_CLASS_QUERY(CatchProj)
+  DEFINE_CLASS_QUERY(CheckCastPP)
+  DEFINE_CLASS_QUERY(ConstraintCast)
+  DEFINE_CLASS_QUERY(CMove)
+  DEFINE_CLASS_QUERY(Cmp)
+  DEFINE_CLASS_QUERY(CountedLoop)
+  DEFINE_CLASS_QUERY(CountedLoopEnd)
+  DEFINE_CLASS_QUERY(FastLock)
+  DEFINE_CLASS_QUERY(FastUnlock)
+  DEFINE_CLASS_QUERY(If)
+  DEFINE_CLASS_QUERY(IfFalse)
+  DEFINE_CLASS_QUERY(IfTrue)
+  DEFINE_CLASS_QUERY(Initialize)
+  DEFINE_CLASS_QUERY(Jump)
+  DEFINE_CLASS_QUERY(JumpProj)
+  DEFINE_CLASS_QUERY(Load)
+  DEFINE_CLASS_QUERY(LoadStore)
+  DEFINE_CLASS_QUERY(Lock)
+  DEFINE_CLASS_QUERY(Loop)
+  DEFINE_CLASS_QUERY(Mach)
+  DEFINE_CLASS_QUERY(MachCall)
+  DEFINE_CLASS_QUERY(MachCallDynamicJava)
+  DEFINE_CLASS_QUERY(MachCallJava)
+  DEFINE_CLASS_QUERY(MachCallLeaf)
+  DEFINE_CLASS_QUERY(MachCallRuntime)
+  DEFINE_CLASS_QUERY(MachCallStaticJava)
+  DEFINE_CLASS_QUERY(MachIf)
+  DEFINE_CLASS_QUERY(MachNullCheck)
+  DEFINE_CLASS_QUERY(MachReturn)
+  DEFINE_CLASS_QUERY(MachSafePoint)
+  DEFINE_CLASS_QUERY(MachSpillCopy)
+  DEFINE_CLASS_QUERY(MachTemp)
+  DEFINE_CLASS_QUERY(Mem)
+  DEFINE_CLASS_QUERY(MemBar)
+  DEFINE_CLASS_QUERY(MergeMem)
+  DEFINE_CLASS_QUERY(Mul)
+  DEFINE_CLASS_QUERY(Multi)
+  DEFINE_CLASS_QUERY(MultiBranch)
+  DEFINE_CLASS_QUERY(PCTable)
+  DEFINE_CLASS_QUERY(Phi)
+  DEFINE_CLASS_QUERY(Proj)
+  DEFINE_CLASS_QUERY(Region)
+  DEFINE_CLASS_QUERY(Root)
+  DEFINE_CLASS_QUERY(SafePoint)
+  DEFINE_CLASS_QUERY(Start)
+  DEFINE_CLASS_QUERY(Store)
+  DEFINE_CLASS_QUERY(Sub)
+  DEFINE_CLASS_QUERY(Type)
+  DEFINE_CLASS_QUERY(Unlock)
+
+  #undef DEFINE_CLASS_QUERY
+
+  // duplicate of is_MachSpillCopy()
+  bool is_SpillCopy () const {
+    return ((_class_id & ClassMask_MachSpillCopy) == Class_MachSpillCopy);
+  }
+
+  bool is_Con () const { return (_flags & Flag_is_Con) != 0; }
+  bool is_Goto() const { return (_flags & Flag_is_Goto) != 0; }
+  // The data node which is safe to leave in dead loop during IGVN optimization.
+  bool is_dead_loop_safe() const {
+    return is_Phi() || is_Proj() ||
+           (_flags & (Flag_is_dead_loop_safe | Flag_is_Con)) != 0;
+  }
+
+  // is_Copy() returns copied edge index (0 or 1)
+  uint is_Copy() const { return (_flags & Flag_is_Copy); }
+
+  virtual bool is_CFG() const { return false; }
+
+  // If this node is control-dependent on a test, can it be
+  // rerouted to a dominating equivalent test?  This is usually
+  // true of non-CFG nodes, but can be false for operations which
+  // depend for their correct sequencing on more than one test.
+  // (In that case, hoisting to a dominating test may silently
+  // skip some other important test.)
+  virtual bool depends_only_on_test() const { assert(!is_CFG(), ""); return true; };
+
+  // defined for MachNodes that match 'If' | 'Goto' | 'CountedLoopEnd'
+  bool is_Branch() const { return (_flags & Flag_is_Branch) != 0; }
+
+  // When building basic blocks, I need to have a notion of block beginning
+  // Nodes, next block selector Nodes (block enders), and next block
+  // projections.  These calls need to work on their machine equivalents.  The
+  // Ideal beginning Nodes are RootNode, RegionNode and StartNode.
+  bool is_block_start() const {
+    if ( is_Region() )
+      return this == (const Node*)in(0);
+    else
+      return (_flags & Flag_is_block_start) != 0;
+  }
+
+  // The Ideal control projection Nodes are IfTrue/IfFalse, JumpProjNode, Root,
+  // Goto and Return.  This call also returns the block ending Node.
+  virtual const Node *is_block_proj() const;
+
+  // The node is a "macro" node which needs to be expanded before matching
+  bool is_macro() const { return (_flags & Flag_is_macro) != 0; }
+
+  // Value is a vector of primitive values
+  bool is_Vector() const { return (_flags & Flag_is_Vector) != 0; }
+
+//----------------- Optimization
+
+  // Get the worst-case Type output for this Node.
+  virtual const class Type *bottom_type() const;
+
+  // If we find a better type for a node, try to record it permanently.
+  // Return true if this node actually changed.
+  // Be sure to do the hash_delete game in the "rehash" variant.
+  void raise_bottom_type(const Type* new_type);
+
+  // Get the address type with which this node uses and/or defs memory,
+  // or NULL if none.  The address type is conservatively wide.
+  // Returns non-null for calls, membars, loads, stores, etc.
+  // Returns TypePtr::BOTTOM if the node touches memory "broadly".
+  virtual const class TypePtr *adr_type() const { return NULL; }
+
+  // Return an existing node which computes the same function as this node.
+  // The optimistic combined algorithm requires this to return a Node which
+  // is a small number of steps away (e.g., one of my inputs).
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // Return the set of values this Node can take on at runtime.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  // Return a node which is more "ideal" than the current node.
+  // The invariants on this call are subtle.  If in doubt, read the
+  // treatise in node.cpp above the default implemention AND TEST WITH
+  // +VerifyIterativeGVN!
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+
+  // Some nodes have specific Ideal subgraph transformations only if they are
+  // unique users of specific nodes. Such nodes should be put on IGVN worklist
+  // for the transformations to happen.
+  bool has_special_unique_user() const;
+
+protected:
+  bool remove_dead_region(PhaseGVN *phase, bool can_reshape);
+public:
+
+  // Idealize graph, using DU info.  Done after constant propagation
+  virtual Node *Ideal_DU_postCCP( PhaseCCP *ccp );
+
+  // See if there is valid pipeline info
+  static  const Pipeline *pipeline_class();
+  virtual const Pipeline *pipeline() const;
+
+  // Compute the latency from the def to this instruction of the ith input node
+  uint latency(uint i);
+
+  // Hash & compare functions, for pessimistic value numbering
+
+  // If the hash function returns the special sentinel value NO_HASH,
+  // the node is guaranteed never to compare equal to any other node.
+  // If we accidently generate a hash with value NO_HASH the node
+  // won't go into the table and we'll lose a little optimization.
+  enum { NO_HASH = 0 };
+  virtual uint hash() const;
+  virtual uint cmp( const Node &n ) const;
+
+  // Operation appears to be iteratively computed (such as an induction variable)
+  // It is possible for this operation to return false for a loop-varying
+  // value, if it appears (by local graph inspection) to be computed by a simple conditional.
+  bool is_iteratively_computed();
+
+  // Determine if a node is Counted loop induction variable.
+  // The method is defined in loopnode.cpp.
+  const Node* is_loop_iv() const;
+
+  // Return a node with opcode "opc" and same inputs as "this" if one can
+  // be found; Otherwise return NULL;
+  Node* find_similar(int opc);
+
+  // Return the unique control out if only one. Null if none or more than one.
+  Node* unique_ctrl_out();
+
+//----------------- Code Generation
+
+  // Ideal register class for Matching.  Zero means unmatched instruction
+  // (these are cloned instead of converted to machine nodes).
+  virtual uint ideal_reg() const;
+
+  static const uint NotAMachineReg;   // must be > max. machine register
+
+  // Do we Match on this edge index or not?  Generally false for Control
+  // and true for everything else.  Weird for calls & returns.
+  virtual uint match_edge(uint idx) const;
+
+  // Register class output is returned in
+  virtual const RegMask &out_RegMask() const;
+  // Register class input is expected in
+  virtual const RegMask &in_RegMask(uint) const;
+  // Should we clone rather than spill this instruction?
+  bool rematerialize() const;
+
+  // Return JVM State Object if this Node carries debug info, or NULL otherwise
+  virtual JVMState* jvms() const;
+
+  // Print as assembly
+  virtual void format( PhaseRegAlloc *, outputStream* st = tty ) const;
+  // Emit bytes starting at parameter 'ptr'
+  // Bump 'ptr' by the number of output bytes
+  virtual void emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const;
+  // Size of instruction in bytes
+  virtual uint size(PhaseRegAlloc *ra_) const;
+
+  // Convenience function to extract an integer constant from a node.
+  // If it is not an integer constant (either Con, CastII, or Mach),
+  // return value_if_unknown.
+  jint find_int_con(jint value_if_unknown) const {
+    const TypeInt* t = find_int_type();
+    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+  }
+  // Return the constant, knowing it is an integer constant already
+  jint get_int() const {
+    const TypeInt* t = find_int_type();
+    guarantee(t != NULL, "must be con");
+    return t->get_con();
+  }
+  // Here's where the work is done.  Can produce non-constant int types too.
+  const TypeInt* find_int_type() const;
+
+  // Same thing for long (and intptr_t, via type.hpp):
+  jlong get_long() const {
+    const TypeLong* t = find_long_type();
+    guarantee(t != NULL, "must be con");
+    return t->get_con();
+  }
+  jlong find_long_con(jint value_if_unknown) const {
+    const TypeLong* t = find_long_type();
+    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+  }
+  const TypeLong* find_long_type() const;
+
+  // These guys are called by code generated by ADLC:
+  intptr_t get_ptr() const;
+  jdouble getd() const;
+  jfloat getf() const;
+
+  // Nodes which are pinned into basic blocks
+  virtual bool pinned() const { return false; }
+
+  // Nodes which use memory without consuming it, hence need antidependences
+  // More specifically, needs_anti_dependence_check returns true iff the node
+  // (a) does a load, and (b) does not perform a store (except perhaps to a
+  // stack slot or some other unaliased location).
+  bool needs_anti_dependence_check() const;
+
+  // Return which operand this instruction may cisc-spill. In other words,
+  // return operand position that can convert from reg to memory access
+  virtual int cisc_operand() const { return AdlcVMDeps::Not_cisc_spillable; }
+  bool is_cisc_alternate() const { return (_flags & Flag_is_cisc_alternate) != 0; }
+
+//----------------- Graph walking
+public:
+  // Walk and apply member functions recursively.
+  // Supplied (this) pointer is root.
+  void walk(NFunc pre, NFunc post, void *env);
+  static void nop(Node &, void*); // Dummy empty function
+  static void packregion( Node &n, void* );
+private:
+  void walk_(NFunc pre, NFunc post, void *env, VectorSet &visited);
+
+//----------------- Printing, etc
+public:
+#ifndef PRODUCT
+  Node* find(int idx) const;         // Search the graph for the given idx.
+  Node* find_ctrl(int idx) const;    // Search control ancestors for the given idx.
+  void dump() const;                 // Print this node,
+  void dump(int depth) const;        // Print this node, recursively to depth d
+  void dump_ctrl(int depth) const;   // Print control nodes, to depth d
+  virtual void dump_req() const;     // Print required-edge info
+  virtual void dump_prec() const;    // Print precedence-edge info
+  virtual void dump_out() const;     // Print the output edge info
+  virtual void dump_spec(outputStream *st) const {}; // Print per-node info
+  void verify_edges(Unique_Node_List &visited); // Verify bi-directional edges
+  void verify() const;               // Check Def-Use info for my subgraph
+  static void verify_recur(const Node *n, int verify_depth, VectorSet &old_space, VectorSet &new_space);
+
+  // This call defines a class-unique string used to identify class instances
+  virtual const char *Name() const;
+
+  void dump_format(PhaseRegAlloc *ra) const; // debug access to MachNode::format(...)
+  // RegMask Print Functions
+  void dump_in_regmask(int idx) { in_RegMask(idx).dump(); }
+  void dump_out_regmask() { out_RegMask().dump(); }
+  static int _in_dump_cnt;
+  static bool in_dump() { return _in_dump_cnt > 0; }
+  void fast_dump() const {
+    tty->print("%4d: %-17s", _idx, Name());
+    for (uint i = 0; i < len(); i++)
+      if (in(i))
+        tty->print(" %4d", in(i)->_idx);
+      else
+        tty->print(" NULL");
+    tty->print("\n");
+  }
+#endif
+#ifdef ASSERT
+  void verify_construction();
+  bool verify_jvms(const JVMState* jvms) const;
+  int  _debug_idx;                     // Unique value assigned to every node.
+  int   debug_idx() const              { return _debug_idx; }
+  void  set_debug_idx( int debug_idx ) { _debug_idx = debug_idx; }
+
+  Node* _debug_orig;                   // Original version of this, if any.
+  Node*  debug_orig() const            { return _debug_orig; }
+  void   set_debug_orig(Node* orig);   // _debug_orig = orig
+
+  int        _hash_lock;               // Barrier to modifications of nodes in the hash table
+  void  enter_hash_lock() { ++_hash_lock; assert(_hash_lock < 99, "in too many hash tables?"); }
+  void   exit_hash_lock() { --_hash_lock; assert(_hash_lock >= 0, "mispaired hash locks"); }
+
+  static void init_NodeProperty();
+
+  #if OPTO_DU_ITERATOR_ASSERT
+  const Node* _last_del;               // The last deleted node.
+  uint        _del_tick;               // Bumped when a deletion happens..
+  #endif
+#endif
+};
+
+//-----------------------------------------------------------------------------
+// Iterators over DU info, and associated Node functions.
+
+#if OPTO_DU_ITERATOR_ASSERT
+
+// Common code for assertion checking on DU iterators.
+class DUIterator_Common VALUE_OBJ_CLASS_SPEC {
+#ifdef ASSERT
+ protected:
+  bool         _vdui;               // cached value of VerifyDUIterators
+  const Node*  _node;               // the node containing the _out array
+  uint         _outcnt;             // cached node->_outcnt
+  uint         _del_tick;           // cached node->_del_tick
+  Node*        _last;               // last value produced by the iterator
+
+  void sample(const Node* node);    // used by c'tor to set up for verifies
+  void verify(const Node* node, bool at_end_ok = false);
+  void verify_resync();
+  void reset(const DUIterator_Common& that);
+
+// The VDUI_ONLY macro protects code conditionalized on VerifyDUIterators
+  #define I_VDUI_ONLY(i,x) { if ((i)._vdui) { x; } }
+#else
+  #define I_VDUI_ONLY(i,x) { }
+#endif //ASSERT
+};
+
+#define VDUI_ONLY(x)     I_VDUI_ONLY(*this, x)
+
+// Default DU iterator.  Allows appends onto the out array.
+// Allows deletion from the out array only at the current point.
+// Usage:
+//  for (DUIterator i = x->outs(); x->has_out(i); i++) {
+//    Node* y = x->out(i);
+//    ...
+//  }
+// Compiles in product mode to a unsigned integer index, which indexes
+// onto a repeatedly reloaded base pointer of x->_out.  The loop predicate
+// also reloads x->_outcnt.  If you delete, you must perform "--i" just
+// before continuing the loop.  You must delete only the last-produced
+// edge.  You must delete only a single copy of the last-produced edge,
+// or else you must delete all copies at once (the first time the edge
+// is produced by the iterator).
+class DUIterator : public DUIterator_Common {
+  friend class Node;
+
+  // This is the index which provides the product-mode behavior.
+  // Whatever the product-mode version of the system does to the
+  // DUI index is done to this index.  All other fields in
+  // this class are used only for assertion checking.
+  uint         _idx;
+
+  #ifdef ASSERT
+  uint         _refresh_tick;    // Records the refresh activity.
+
+  void sample(const Node* node); // Initialize _refresh_tick etc.
+  void verify(const Node* node, bool at_end_ok = false);
+  void verify_increment();       // Verify an increment operation.
+  void verify_resync();          // Verify that we can back up over a deletion.
+  void verify_finish();          // Verify that the loop terminated properly.
+  void refresh();                // Resample verification info.
+  void reset(const DUIterator& that);  // Resample after assignment.
+  #endif
+
+  DUIterator(const Node* node, int dummy_to_avoid_conversion)
+    { _idx = 0;                         debug_only(sample(node)); }
+
+ public:
+  // initialize to garbage; clear _vdui to disable asserts
+  DUIterator()
+    { /*initialize to garbage*/         debug_only(_vdui = false); }
+
+  void operator++(int dummy_to_specify_postfix_op)
+    { _idx++;                           VDUI_ONLY(verify_increment()); }
+
+  void operator--()
+    { VDUI_ONLY(verify_resync());       --_idx; }
+
+  ~DUIterator()
+    { VDUI_ONLY(verify_finish()); }
+
+  void operator=(const DUIterator& that)
+    { _idx = that._idx;                 debug_only(reset(that)); }
+};
+
+DUIterator Node::outs() const
+  { return DUIterator(this, 0); }
+DUIterator& Node::refresh_out_pos(DUIterator& i) const
+  { I_VDUI_ONLY(i, i.refresh());        return i; }
+bool Node::has_out(DUIterator& i) const
+  { I_VDUI_ONLY(i, i.verify(this,true));return i._idx < _outcnt; }
+Node*    Node::out(DUIterator& i) const
+  { I_VDUI_ONLY(i, i.verify(this));     return debug_only(i._last=) _out[i._idx]; }
+
+
+// Faster DU iterator.  Disallows insertions into the out array.
+// Allows deletion from the out array only at the current point.
+// Usage:
+//  for (DUIterator_Fast imax, i = x->fast_outs(imax); i < imax; i++) {
+//    Node* y = x->fast_out(i);
+//    ...
+//  }
+// Compiles in product mode to raw Node** pointer arithmetic, with
+// no reloading of pointers from the original node x.  If you delete,
+// you must perform "--i; --imax" just before continuing the loop.
+// If you delete multiple copies of the same edge, you must decrement
+// imax, but not i, multiple times:  "--i, imax -= num_edges".
+class DUIterator_Fast : public DUIterator_Common {
+  friend class Node;
+  friend class DUIterator_Last;
+
+  // This is the pointer which provides the product-mode behavior.
+  // Whatever the product-mode version of the system does to the
+  // DUI pointer is done to this pointer.  All other fields in
+  // this class are used only for assertion checking.
+  Node**       _outp;
+
+  #ifdef ASSERT
+  void verify(const Node* node, bool at_end_ok = false);
+  void verify_limit();
+  void verify_resync();
+  void verify_relimit(uint n);
+  void reset(const DUIterator_Fast& that);
+  #endif
+
+  // Note:  offset must be signed, since -1 is sometimes passed
+  DUIterator_Fast(const Node* node, ptrdiff_t offset)
+    { _outp = node->_out + offset;      debug_only(sample(node)); }
+
+ public:
+  // initialize to garbage; clear _vdui to disable asserts
+  DUIterator_Fast()
+    { /*initialize to garbage*/         debug_only(_vdui = false); }
+
+  void operator++(int dummy_to_specify_postfix_op)
+    { _outp++;                          VDUI_ONLY(verify(_node, true)); }
+
+  void operator--()
+    { VDUI_ONLY(verify_resync());       --_outp; }
+
+  void operator-=(uint n)   // applied to the limit only
+    { _outp -= n;           VDUI_ONLY(verify_relimit(n));  }
+
+  bool operator<(DUIterator_Fast& limit) {
+    I_VDUI_ONLY(*this, this->verify(_node, true));
+    I_VDUI_ONLY(limit, limit.verify_limit());
+    return _outp < limit._outp;
+  }
+
+  void operator=(const DUIterator_Fast& that)
+    { _outp = that._outp;               debug_only(reset(that)); }
+};
+
+DUIterator_Fast Node::fast_outs(DUIterator_Fast& imax) const {
+  // Assign a limit pointer to the reference argument:
+  imax = DUIterator_Fast(this, (ptrdiff_t)_outcnt);
+  // Return the base pointer:
+  return DUIterator_Fast(this, 0);
+}
+Node* Node::fast_out(DUIterator_Fast& i) const {
+  I_VDUI_ONLY(i, i.verify(this));
+  return debug_only(i._last=) *i._outp;
+}
+
+
+// Faster DU iterator.  Requires each successive edge to be removed.
+// Does not allow insertion of any edges.
+// Usage:
+//  for (DUIterator_Last imin, i = x->last_outs(imin); i >= imin; i -= num_edges) {
+//    Node* y = x->last_out(i);
+//    ...
+//  }
+// Compiles in product mode to raw Node** pointer arithmetic, with
+// no reloading of pointers from the original node x.
+class DUIterator_Last : private DUIterator_Fast {
+  friend class Node;
+
+  #ifdef ASSERT
+  void verify(const Node* node, bool at_end_ok = false);
+  void verify_limit();
+  void verify_step(uint num_edges);
+  #endif
+
+  // Note:  offset must be signed, since -1 is sometimes passed
+  DUIterator_Last(const Node* node, ptrdiff_t offset)
+    : DUIterator_Fast(node, offset) { }
+
+  void operator++(int dummy_to_specify_postfix_op) {} // do not use
+  void operator<(int)                              {} // do not use
+
+ public:
+  DUIterator_Last() { }
+  // initialize to garbage
+
+  void operator--()
+    { _outp--;              VDUI_ONLY(verify_step(1));  }
+
+  void operator-=(uint n)
+    { _outp -= n;           VDUI_ONLY(verify_step(n));  }
+
+  bool operator>=(DUIterator_Last& limit) {
+    I_VDUI_ONLY(*this, this->verify(_node, true));
+    I_VDUI_ONLY(limit, limit.verify_limit());
+    return _outp >= limit._outp;
+  }
+
+  void operator=(const DUIterator_Last& that)
+    { DUIterator_Fast::operator=(that); }
+};
+
+DUIterator_Last Node::last_outs(DUIterator_Last& imin) const {
+  // Assign a limit pointer to the reference argument:
+  imin = DUIterator_Last(this, 0);
+  // Return the initial pointer:
+  return DUIterator_Last(this, (ptrdiff_t)_outcnt - 1);
+}
+Node* Node::last_out(DUIterator_Last& i) const {
+  I_VDUI_ONLY(i, i.verify(this));
+  return debug_only(i._last=) *i._outp;
+}
+
+#endif //OPTO_DU_ITERATOR_ASSERT
+
+#undef I_VDUI_ONLY
+#undef VDUI_ONLY
+
+
+//-----------------------------------------------------------------------------
+// Map dense integer indices to Nodes.  Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Node*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+class Node_Array : public ResourceObj {
+protected:
+  Arena *_a;                    // Arena to allocate in
+  uint   _max;
+  Node **_nodes;
+  void   grow( uint i );        // Grow array node to fit
+public:
+  Node_Array(Arena *a) : _a(a), _max(OptoNodeListSize) {
+    _nodes = NEW_ARENA_ARRAY( a, Node *, OptoNodeListSize );
+    for( int i = 0; i < OptoNodeListSize; i++ ) {
+      _nodes[i] = NULL;
+    }
+  }
+
+  Node_Array(Node_Array *na) : _a(na->_a), _max(na->_max), _nodes(na->_nodes) {}
+  Node *operator[] ( uint i ) const // Lookup, or NULL for not mapped
+  { return (i<_max) ? _nodes[i] : (Node*)NULL; }
+  Node *at( uint i ) const { assert(i<_max,"oob"); return _nodes[i]; }
+  Node **adr() { return _nodes; }
+  // Extend the mapping: index i maps to Node *n.
+  void map( uint i, Node *n ) { if( i>=_max ) grow(i); _nodes[i] = n; }
+  void insert( uint i, Node *n );
+  void remove( uint i );        // Remove, preserving order
+  void sort( C_sort_func_t func);
+  void reset( Arena *new_a );   // Zap mapping to empty; reclaim storage
+  void clear();                 // Set all entries to NULL, keep storage
+  uint Size() const { return _max; }
+  void dump() const;
+};
+
+class Node_List : public Node_Array {
+  uint _cnt;
+public:
+  Node_List() : Node_Array(Thread::current()->resource_area()), _cnt(0) {}
+  Node_List(Arena *a) : Node_Array(a), _cnt(0) {}
+  void insert( uint i, Node *n ) { Node_Array::insert(i,n); _cnt++; }
+  void remove( uint i ) { Node_Array::remove(i); _cnt--; }
+  void push( Node *b ) { map(_cnt++,b); }
+  void yank( Node *n );         // Find and remove
+  Node *pop() { return _nodes[--_cnt]; }
+  Node *rpop() { Node *b = _nodes[0]; _nodes[0]=_nodes[--_cnt]; return b;}
+  void clear() { _cnt = 0; Node_Array::clear(); } // retain storage
+  uint size() const { return _cnt; }
+  void dump() const;
+};
+
+//------------------------------Unique_Node_List-------------------------------
+class Unique_Node_List : public Node_List {
+  VectorSet _in_worklist;
+  uint _clock_index;            // Index in list where to pop from next
+public:
+  Unique_Node_List() : Node_List(), _in_worklist(Thread::current()->resource_area()), _clock_index(0) {}
+  Unique_Node_List(Arena *a) : Node_List(a), _in_worklist(a), _clock_index(0) {}
+
+  void remove( Node *n );
+  bool member( Node *n ) { return _in_worklist.test(n->_idx) != 0; }
+  VectorSet &member_set(){ return _in_worklist; }
+
+  void push( Node *b ) {
+    if( !_in_worklist.test_set(b->_idx) )
+      Node_List::push(b);
+  }
+  Node *pop() {
+    if( _clock_index >= size() ) _clock_index = 0;
+    Node *b = at(_clock_index);
+    map( _clock_index++, Node_List::pop());
+    _in_worklist >>= b->_idx;
+    return b;
+  }
+  Node *remove( uint i ) {
+    Node *b = Node_List::at(i);
+    _in_worklist >>= b->_idx;
+    map(i,Node_List::pop());
+    return b;
+  }
+  void yank( Node *n ) { _in_worklist >>= n->_idx; Node_List::yank(n); }
+  void  clear() {
+    _in_worklist.Clear();        // Discards storage but grows automatically
+    Node_List::clear();
+    _clock_index = 0;
+  }
+
+  // Used after parsing to remove useless nodes before Iterative GVN
+  void remove_useless_nodes(VectorSet &useful);
+
+#ifndef PRODUCT
+  void print_set() const { _in_worklist.print(); }
+#endif
+};
+
+// Inline definition of Compile::record_for_igvn must be deferred to this point.
+inline void Compile::record_for_igvn(Node* n) {
+  _for_igvn->push(n);
+  record_for_escape_analysis(n);
+}
+
+//------------------------------Node_Stack-------------------------------------
+class Node_Stack {
+protected:
+  struct INode {
+    Node *node; // Processed node
+    uint  indx; // Index of next node's child
+  };
+  INode *_inode_top; // tos, stack grows up
+  INode *_inode_max; // End of _inodes == _inodes + _max
+  INode *_inodes;    // Array storage for the stack
+  Arena *_a;         // Arena to allocate in
+  void grow();
+public:
+  Node_Stack(int size) {
+    size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
+    _a = Thread::current()->resource_area();
+    _inodes = NEW_ARENA_ARRAY( _a, INode, max );
+    _inode_max = _inodes + max;
+    _inode_top = _inodes - 1; // stack is empty
+  }
+
+  Node_Stack(Arena *a, int size) : _a(a) {
+    size_t max = (size > OptoNodeListSize) ? size : OptoNodeListSize;
+    _inodes = NEW_ARENA_ARRAY( _a, INode, max );
+    _inode_max = _inodes + max;
+    _inode_top = _inodes - 1; // stack is empty
+  }
+
+  void pop() {
+    assert(_inode_top >= _inodes, "node stack underflow");
+    --_inode_top;
+  }
+  void push(Node *n, uint i) {
+    ++_inode_top;
+    if (_inode_top >= _inode_max) grow();
+    INode *top = _inode_top; // optimization
+    top->node = n;
+    top->indx = i;
+  }
+  Node *node() const {
+    return _inode_top->node;
+  }
+  Node* node_at(uint i) const {
+    assert(_inodes + i <= _inode_top, "in range");
+    return _inodes[i].node;
+  }
+  uint index() const {
+    return _inode_top->indx;
+  }
+  void set_node(Node *n) {
+    _inode_top->node = n;
+  }
+  void set_index(uint i) {
+    _inode_top->indx = i;
+  }
+  uint size_max() const { return (uint)pointer_delta(_inode_max, _inodes,  sizeof(INode)); } // Max size
+  uint size() const { return (uint)pointer_delta(_inode_top, _inodes,  sizeof(INode)) + 1; } // Current size
+  bool is_nonempty() const { return (_inode_top >= _inodes); }
+  bool is_empty() const { return (_inode_top < _inodes); }
+  void clear() { _inode_top = _inodes - 1; } // retain storage
+};
+
+
+//-----------------------------Node_Notes--------------------------------------
+// Debugging or profiling annotations loosely and sparsely associated
+// with some nodes.  See Compile::node_notes_at for the accessor.
+class Node_Notes VALUE_OBJ_CLASS_SPEC {
+  JVMState* _jvms;
+
+public:
+  Node_Notes(JVMState* jvms = NULL) {
+    _jvms = jvms;
+  }
+
+  JVMState* jvms()            { return _jvms; }
+  void  set_jvms(JVMState* x) {        _jvms = x; }
+
+  // True if there is nothing here.
+  bool is_clear() {
+    return (_jvms == NULL);
+  }
+
+  // Make there be nothing here.
+  void clear() {
+    _jvms = NULL;
+  }
+
+  // Make a new, clean node notes.
+  static Node_Notes* make(Compile* C) {
+    Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
+    nn->clear();
+    return nn;
+  }
+
+  Node_Notes* clone(Compile* C) {
+    Node_Notes* nn = NEW_ARENA_ARRAY(C->comp_arena(), Node_Notes, 1);
+    (*nn) = (*this);
+    return nn;
+  }
+
+  // Absorb any information from source.
+  bool update_from(Node_Notes* source) {
+    bool changed = false;
+    if (source != NULL) {
+      if (source->jvms() != NULL) {
+        set_jvms(source->jvms());
+        changed = true;
+      }
+    }
+    return changed;
+  }
+};
+
+// Inlined accessors for Compile::node_nodes that require the preceding class:
+inline Node_Notes*
+Compile::locate_node_notes(GrowableArray<Node_Notes*>* arr,
+                           int idx, bool can_grow) {
+  assert(idx >= 0, "oob");
+  int block_idx = (idx >> _log2_node_notes_block_size);
+  int grow_by = (block_idx - (arr == NULL? 0: arr->length()));
+  if (grow_by >= 0) {
+    if (!can_grow)  return NULL;
+    grow_node_notes(arr, grow_by + 1);
+  }
+  // (Every element of arr is a sub-array of length _node_notes_block_size.)
+  return arr->at(block_idx) + (idx & (_node_notes_block_size-1));
+}
+
+inline bool
+Compile::set_node_notes_at(int idx, Node_Notes* value) {
+  if (value == NULL || value->is_clear())
+    return false;  // nothing to write => write nothing
+  Node_Notes* loc = locate_node_notes(_node_note_array, idx, true);
+  assert(loc != NULL, "");
+  return loc->update_from(value);
+}
+
+
+//------------------------------TypeNode---------------------------------------
+// Node with a Type constant.
+class TypeNode : public Node {
+protected:
+  virtual uint hash() const;    // Check the type
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const; // Size is bigger
+  const Type* const _type;
+public:
+  void set_type(const Type* t) {
+    assert(t != NULL, "sanity");
+    debug_only(uint check_hash = (VerifyHashTableKeys && _hash_lock) ? hash() : NO_HASH);
+    *(const Type**)&_type = t;   // cast away const-ness
+    // If this node is in the hash table, make sure it doesn't need a rehash.
+    assert(check_hash == NO_HASH || check_hash == hash(), "type change must preserve hash code");
+  }
+  const Type* type() const { assert(_type != NULL, "sanity"); return _type; };
+  TypeNode( const Type *t, uint required ) : Node(required), _type(t) {
+    init_class_id(Class_Type);
+  }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const;
+  virtual       uint  ideal_reg() const;
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
diff --git a/src/share/vm/opto/opcodes.cpp b/src/share/vm/opto/opcodes.cpp
new file mode 100644
index 000000000..533cff06c
--- /dev/null
+++ b/src/share/vm/opto/opcodes.cpp
@@ -0,0 +1,42 @@
+/*
+ * Copyright 1998-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// ----------------------------------------------------------------------------
+// Build a table of class names as strings.  Used both for debugging printouts
+// and in the ADL machine descriptions.
+#define macro(x) #x,
+const char *NodeClassNames[] = {
+  "Node",
+  "Set",
+  "RegI",
+  "RegP",
+  "RegF",
+  "RegD",
+  "RegL",
+  "RegFlags",
+  "_last_machine_leaf",
+#include "classes.hpp"
+  "_last_class_name",
+};
+#undef macro
diff --git a/src/share/vm/opto/opcodes.hpp b/src/share/vm/opto/opcodes.hpp
new file mode 100644
index 000000000..7c3e38a15
--- /dev/null
+++ b/src/share/vm/opto/opcodes.hpp
@@ -0,0 +1,43 @@
+/*
+ * Copyright 1997-2003 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Build a big enum of class names to give them dense integer indices
+#define macro(x) Op_##x,
+enum Opcodes {
+  Op_Node = 0,
+  macro(Set)                    // Instruction selection match rule
+  macro(RegI)                   // Machine integer register
+  macro(RegP)                   // Machine pointer register
+  macro(RegF)                   // Machine float   register
+  macro(RegD)                   // Machine double  register
+  macro(RegL)                   // Machine long    register
+  macro(RegFlags)               // Machine flags   register
+  _last_machine_leaf,           // Split between regular opcodes and machine
+#include "classes.hpp"
+  _last_opcode
+};
+#undef macro
+
+// Table of names, indexed by Opcode
+extern const char *NodeClassNames[];
diff --git a/src/share/vm/opto/optoreg.hpp b/src/share/vm/opto/optoreg.hpp
new file mode 100644
index 000000000..68a2df2cd
--- /dev/null
+++ b/src/share/vm/opto/optoreg.hpp
@@ -0,0 +1,194 @@
+/*
+ * Copyright 2006-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------OptoReg----------------------------------------
+// We eventually need Registers for the Real World.  Registers are essentially
+// non-SSA names.  A Register is represented as a number.  Non-regular values
+// (e.g., Control, Memory, I/O) use the Special register.  The actual machine
+// registers (as described in the ADL file for a machine) start at zero.
+// Stack-slots (spill locations) start at the nest Chunk past the last machine
+// register.
+//
+// Note that stack spill-slots are treated as a very large register set.
+// They have all the correct properties for a Register: not aliased (unique
+// named).  There is some simple mapping from a stack-slot register number
+// to the actual location on the stack; this mapping depends on the calling
+// conventions and is described in the ADL.
+//
+// Note that Name is not enum. C++ standard defines that the range of enum
+// is the range of smallest bit-field that can represent all enumerators
+// declared in the enum. The result of assigning a value to enum is undefined
+// if the value is outside the enumeration's valid range. OptoReg::Name is
+// typedef'ed as int, because it needs to be able to represent spill-slots.
+//
+class OptoReg VALUE_OBJ_CLASS_SPEC {
+
+ friend class C2Compiler;
+ public:
+  typedef int Name;
+  enum {
+    // Chunk 0
+    Physical = AdlcVMDeps::Physical, // Start of physical regs
+    // A few oddballs at the edge of the world
+    Special = -2,               // All special (not allocated) values
+    Bad = -1                    // Not a register
+  };
+
+ private:
+
+ static const VMReg opto2vm[REG_COUNT];
+ static Name vm2opto[ConcreteRegisterImpl::number_of_registers];
+
+ public:
+
+  // Stack pointer register
+  static OptoReg::Name c_frame_pointer;
+
+
+
+  // Increment a register number.  As in:
+  //    "for ( OptoReg::Name i; i=Control; i = add(i,1) ) ..."
+  static Name add( Name x, int y ) { return Name(x+y); }
+
+  // (We would like to have an operator+ for RegName, but it is not
+  // a class, so this would be illegal in C++.)
+
+  static void dump( int );
+
+  // Get the stack slot number of an OptoReg::Name
+  static unsigned int reg2stack( OptoReg::Name r) {
+    assert( r >= stack0(), " must be");
+    return r - stack0();
+  }
+
+  // convert a stack slot number into an OptoReg::Name
+  static OptoReg::Name stack2reg( int idx) {
+    return Name(stack0() + idx);
+  }
+
+  static bool is_stack(Name n) {
+    return n >= stack0();
+  }
+
+  static bool is_valid(Name n) {
+    return (n != Bad);
+  }
+
+  static bool is_reg(Name n) {
+    return  is_valid(n) && !is_stack(n);
+  }
+
+  static VMReg as_VMReg(OptoReg::Name n) {
+    if (is_reg(n)) {
+      // Must use table, it'd be nice if Bad was indexable...
+      return opto2vm[n];
+    } else {
+      assert(!is_stack(n), "must un warp");
+      return VMRegImpl::Bad();
+    }
+  }
+
+  // Can un-warp a stack slot or convert a register or Bad
+  static VMReg as_VMReg(OptoReg::Name n, int frame_size, int arg_count) {
+    if (is_reg(n)) {
+      // Must use table, it'd be nice if Bad was indexable...
+      return opto2vm[n];
+    } else if (is_stack(n)) {
+      int stack_slot = reg2stack(n);
+      if (stack_slot < arg_count) {
+        return VMRegImpl::stack2reg(stack_slot + frame_size);
+      }
+      return VMRegImpl::stack2reg(stack_slot - arg_count);
+      // return return VMRegImpl::stack2reg(reg2stack(OptoReg::add(n, -arg_count)));
+    } else {
+      return VMRegImpl::Bad();
+    }
+  }
+
+  static OptoReg::Name as_OptoReg(VMReg r) {
+    if (r->is_stack()) {
+      assert(false, "must warp");
+      return stack2reg(r->reg2stack());
+    } else if (r->is_valid()) {
+      // Must use table, it'd be nice if Bad was indexable...
+      return vm2opto[r->value()];
+    } else {
+      return Bad;
+    }
+  }
+
+  static OptoReg::Name stack0() {
+    return VMRegImpl::stack0->value();
+  }
+
+  static const char* regname(OptoReg::Name n) {
+    return as_VMReg(n)->name();
+  }
+
+};
+
+//---------------------------OptoRegPair-------------------------------------------
+// Pairs of 32-bit registers for the allocator.
+// This is a very similar class to VMRegPair. C2 only interfaces with VMRegPair
+// via the calling convention code which is shared between the compilers.
+// Since C2 uses OptoRegs for register allocation it is more efficient to use
+// VMRegPair internally for nodes that can contain a pair of OptoRegs rather
+// than use VMRegPair and continually be converting back and forth. So normally
+// C2 will take in a VMRegPair from the calling convention code and immediately
+// convert them to an OptoRegPair and stay in the OptoReg world. The only over
+// conversion between OptoRegs and VMRegs is for debug info and oopMaps. This
+// is not a high bandwidth spot and so it is not an issue.
+// Note that onde other consequence of staying in the OptoReg world with OptoRegPairs
+// is that there are "physical" OptoRegs that are not representable in the VMReg
+// world, notably flags. [ But by design there is "space" in the VMReg world
+// for such registers they just may not be concrete ]. So if we were to use VMRegPair
+// then the VMReg world would have to have a representation for these registers
+// so that a OptoReg->VMReg->OptoReg would reproduce ther original OptoReg. As it
+// stands if you convert a flag (condition code) to a VMReg you will get VMRegImpl::Bad
+// and converting that will return OptoReg::Bad losing the identity of the OptoReg.
+
+class OptoRegPair {
+private:
+  short _second;
+  short _first;
+public:
+  void set_bad (                   ) { _second = OptoReg::Bad; _first = OptoReg::Bad; }
+  void set1    ( OptoReg::Name n  ) { _second = OptoReg::Bad; _first = n; }
+  void set2    ( OptoReg::Name n  ) { _second = n + 1;       _first = n; }
+  void set_pair( OptoReg::Name second, OptoReg::Name first    ) { _second= second;    _first= first; }
+  void set_ptr ( OptoReg::Name ptr ) {
+#ifdef _LP64
+    _second = ptr+1;
+#else
+    _second = OptoReg::Bad;
+#endif
+    _first = ptr;
+  }
+
+  OptoReg::Name second() const { return _second; }
+  OptoReg::Name first() const { return _first; }
+  OptoRegPair(OptoReg::Name second, OptoReg::Name first) {  _second = second; _first = first; }
+  OptoRegPair(OptoReg::Name f) { _second = OptoReg::Bad; _first = f; }
+  OptoRegPair() { _second = OptoReg::Bad; _first = OptoReg::Bad; }
+};
diff --git a/src/share/vm/opto/output.cpp b/src/share/vm/opto/output.cpp
new file mode 100644
index 000000000..7868ec237
--- /dev/null
+++ b/src/share/vm/opto/output.cpp
@@ -0,0 +1,2680 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_output.cpp.incl"
+
+extern uint size_java_to_interp();
+extern uint reloc_java_to_interp();
+extern uint size_exception_handler();
+extern uint size_deopt_handler();
+
+#ifndef PRODUCT
+#define DEBUG_ARG(x) , x
+#else
+#define DEBUG_ARG(x)
+#endif
+
+extern int emit_exception_handler(CodeBuffer &cbuf);
+extern int emit_deopt_handler(CodeBuffer &cbuf);
+
+//------------------------------Output-----------------------------------------
+// Convert Nodes to instruction bits and pass off to the VM
+void Compile::Output() {
+  // RootNode goes
+  assert( _cfg->_broot->_nodes.size() == 0, "" );
+
+  // Initialize the space for the BufferBlob used to find and verify
+  // instruction size in MachNode::emit_size()
+  init_scratch_buffer_blob();
+
+  // Make sure I can find the Start Node
+  Block_Array& bbs = _cfg->_bbs;
+  Block *entry = _cfg->_blocks[1];
+  Block *broot = _cfg->_broot;
+
+  const StartNode *start = entry->_nodes[0]->as_Start();
+
+  // Replace StartNode with prolog
+  MachPrologNode *prolog = new (this) MachPrologNode();
+  entry->_nodes.map( 0, prolog );
+  bbs.map( prolog->_idx, entry );
+  bbs.map( start->_idx, NULL ); // start is no longer in any block
+
+  // Virtual methods need an unverified entry point
+
+  if( is_osr_compilation() ) {
+    if( PoisonOSREntry ) {
+      // TODO: Should use a ShouldNotReachHereNode...
+      _cfg->insert( broot, 0, new (this) MachBreakpointNode() );
+    }
+  } else {
+    if( _method && !_method->flags().is_static() ) {
+      // Insert unvalidated entry point
+      _cfg->insert( broot, 0, new (this) MachUEPNode() );
+    }
+
+  }
+
+
+  // Break before main entry point
+  if( (_method && _method->break_at_execute())
+#ifndef PRODUCT
+    ||(OptoBreakpoint && is_method_compilation())
+    ||(OptoBreakpointOSR && is_osr_compilation())
+    ||(OptoBreakpointC2R && !_method)
+#endif
+    ) {
+    // checking for _method means that OptoBreakpoint does not apply to
+    // runtime stubs or frame converters
+    _cfg->insert( entry, 1, new (this) MachBreakpointNode() );
+  }
+
+  // Insert epilogs before every return
+  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+    Block *b = _cfg->_blocks[i];
+    if( !b->is_connector() && b->non_connector_successor(0) == _cfg->_broot ) { // Found a program exit point?
+      Node *m = b->end();
+      if( m->is_Mach() && m->as_Mach()->ideal_Opcode() != Op_Halt ) {
+        MachEpilogNode *epilog = new (this) MachEpilogNode(m->as_Mach()->ideal_Opcode() == Op_Return);
+        b->add_inst( epilog );
+        bbs.map(epilog->_idx, b);
+        //_regalloc->set_bad(epilog->_idx); // Already initialized this way.
+      }
+    }
+  }
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  if ( ZapDeadCompiledLocals )  Insert_zap_nodes();
+# endif
+
+  ScheduleAndBundle();
+
+#ifndef PRODUCT
+  if (trace_opto_output()) {
+    tty->print("\n---- After ScheduleAndBundle ----\n");
+    for (uint i = 0; i < _cfg->_num_blocks; i++) {
+      tty->print("\nBB#%03d:\n", i);
+      Block *bb = _cfg->_blocks[i];
+      for (uint j = 0; j < bb->_nodes.size(); j++) {
+        Node *n = bb->_nodes[j];
+        OptoReg::Name reg = _regalloc->get_reg_first(n);
+        tty->print(" %-6s ", reg >= 0 && reg < REG_COUNT ? Matcher::regName[reg] : "");
+        n->dump();
+      }
+    }
+  }
+#endif
+
+  if (failing())  return;
+
+  BuildOopMaps();
+
+  if (failing())  return;
+
+  Fill_buffer();
+}
+
+bool Compile::need_stack_bang(int frame_size_in_bytes) const {
+  // Determine if we need to generate a stack overflow check.
+  // Do it if the method is not a stub function and
+  // has java calls or has frame size > vm_page_size/8.
+  return (stub_function() == NULL &&
+          (has_java_calls() || frame_size_in_bytes > os::vm_page_size()>>3));
+}
+
+bool Compile::need_register_stack_bang() const {
+  // Determine if we need to generate a register stack overflow check.
+  // This is only used on architectures which have split register
+  // and memory stacks (ie. IA64).
+  // Bang if the method is not a stub function and has java calls
+  return (stub_function() == NULL && has_java_calls());
+}
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+
+
+// In order to catch compiler oop-map bugs, we have implemented
+// a debugging mode called ZapDeadCompilerLocals.
+// This mode causes the compiler to insert a call to a runtime routine,
+// "zap_dead_locals", right before each place in compiled code
+// that could potentially be a gc-point (i.e., a safepoint or oop map point).
+// The runtime routine checks that locations mapped as oops are really
+// oops, that locations mapped as values do not look like oops,
+// and that locations mapped as dead are not used later
+// (by zapping them to an invalid address).
+
+int Compile::_CompiledZap_count = 0;
+
+void Compile::Insert_zap_nodes() {
+  bool skip = false;
+
+
+  // Dink with static counts because code code without the extra
+  // runtime calls is MUCH faster for debugging purposes
+
+       if ( CompileZapFirst  ==  0  ) ; // nothing special
+  else if ( CompileZapFirst  >  CompiledZap_count() )  skip = true;
+  else if ( CompileZapFirst  == CompiledZap_count() )
+    warning("starting zap compilation after skipping");
+
+       if ( CompileZapLast  ==  -1  ) ; // nothing special
+  else if ( CompileZapLast  <   CompiledZap_count() )  skip = true;
+  else if ( CompileZapLast  ==  CompiledZap_count() )
+    warning("about to compile last zap");
+
+  ++_CompiledZap_count; // counts skipped zaps, too
+
+  if ( skip )  return;
+
+
+  if ( _method == NULL )
+    return; // no safepoints/oopmaps emitted for calls in stubs,so we don't care
+
+  // Insert call to zap runtime stub before every node with an oop map
+  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+    Block *b = _cfg->_blocks[i];
+    for ( uint j = 0;  j < b->_nodes.size();  ++j ) {
+      Node *n = b->_nodes[j];
+
+      // Determining if we should insert a zap-a-lot node in output.
+      // We do that for all nodes that has oopmap info, except for calls
+      // to allocation.  Calls to allocation passes in the old top-of-eden pointer
+      // and expect the C code to reset it.  Hence, there can be no safepoints between
+      // the inlined-allocation and the call to new_Java, etc.
+      // We also cannot zap monitor calls, as they must hold the microlock
+      // during the call to Zap, which also wants to grab the microlock.
+      bool insert = n->is_MachSafePoint() && (n->as_MachSafePoint()->oop_map() != NULL);
+      if ( insert ) { // it is MachSafePoint
+        if ( !n->is_MachCall() ) {
+          insert = false;
+        } else if ( n->is_MachCall() ) {
+          MachCallNode* call = n->as_MachCall();
+          if (call->entry_point() == OptoRuntime::new_instance_Java() ||
+              call->entry_point() == OptoRuntime::new_array_Java() ||
+              call->entry_point() == OptoRuntime::multianewarray2_Java() ||
+              call->entry_point() == OptoRuntime::multianewarray3_Java() ||
+              call->entry_point() == OptoRuntime::multianewarray4_Java() ||
+              call->entry_point() == OptoRuntime::multianewarray5_Java() ||
+              call->entry_point() == OptoRuntime::slow_arraycopy_Java() ||
+              call->entry_point() == OptoRuntime::complete_monitor_locking_Java()
+              ) {
+            insert = false;
+          }
+        }
+        if (insert) {
+          Node *zap = call_zap_node(n->as_MachSafePoint(), i);
+          b->_nodes.insert( j, zap );
+          _cfg->_bbs.map( zap->_idx, b );
+          ++j;
+        }
+      }
+    }
+  }
+}
+
+
+Node* Compile::call_zap_node(MachSafePointNode* node_to_check, int block_no) {
+  const TypeFunc *tf = OptoRuntime::zap_dead_locals_Type();
+  CallStaticJavaNode* ideal_node =
+    new (this, tf->domain()->cnt()) CallStaticJavaNode( tf,
+         OptoRuntime::zap_dead_locals_stub(_method->flags().is_native()),
+                            "call zap dead locals stub", 0, TypePtr::BOTTOM);
+  // We need to copy the OopMap from the site we're zapping at.
+  // We have to make a copy, because the zap site might not be
+  // a call site, and zap_dead is a call site.
+  OopMap* clone = node_to_check->oop_map()->deep_copy();
+
+  // Add the cloned OopMap to the zap node
+  ideal_node->set_oop_map(clone);
+  return _matcher->match_sfpt(ideal_node);
+}
+
+//------------------------------is_node_getting_a_safepoint--------------------
+bool Compile::is_node_getting_a_safepoint( Node* n) {
+  // This code duplicates the logic prior to the call of add_safepoint
+  // below in this file.
+  if( n->is_MachSafePoint() ) return true;
+  return false;
+}
+
+# endif // ENABLE_ZAP_DEAD_LOCALS
+
+//------------------------------compute_loop_first_inst_sizes------------------
+// Compute the size of first NumberOfLoopInstrToAlign instructions at head
+// of a loop. When aligning a loop we need to provide enough instructions
+// in cpu's fetch buffer to feed decoders. The loop alignment could be
+// avoided if we have enough instructions in fetch buffer at the head of a loop.
+// By default, the size is set to 999999 by Block's constructor so that
+// a loop will be aligned if the size is not reset here.
+//
+// Note: Mach instructions could contain several HW instructions
+// so the size is estimated only.
+//
+void Compile::compute_loop_first_inst_sizes() {
+  // The next condition is used to gate the loop alignment optimization.
+  // Don't aligned a loop if there are enough instructions at the head of a loop
+  // or alignment padding is larger then MaxLoopPad. By default, MaxLoopPad
+  // is equal to OptoLoopAlignment-1 except on new Intel cpus, where it is
+  // equal to 11 bytes which is the largest address NOP instruction.
+  if( MaxLoopPad < OptoLoopAlignment-1 ) {
+    uint last_block = _cfg->_num_blocks-1;
+    for( uint i=1; i <= last_block; i++ ) {
+      Block *b = _cfg->_blocks[i];
+      // Check the first loop's block which requires an alignment.
+      if( b->head()->is_Loop() &&
+          b->code_alignment() > (uint)relocInfo::addr_unit() ) {
+        uint sum_size = 0;
+        uint inst_cnt = NumberOfLoopInstrToAlign;
+        inst_cnt = b->compute_first_inst_size(sum_size, inst_cnt,
+                                              _regalloc);
+        // Check the next fallthrough block if first loop's block does not have
+        // enough instructions.
+        if( inst_cnt > 0 && i < last_block ) {
+          // First, check if the first loop's block contains whole loop.
+          // LoopNode::LoopBackControl == 2.
+          Block *bx = _cfg->_bbs[b->pred(2)->_idx];
+          // Skip connector blocks (with limit in case of irreducible loops).
+          int search_limit = 16;
+          while( bx->is_connector() && search_limit-- > 0) {
+            bx = _cfg->_bbs[bx->pred(1)->_idx];
+          }
+          if( bx != b ) { // loop body is in several blocks.
+            Block *nb = NULL;
+            while( inst_cnt > 0 && i < last_block && nb != bx &&
+                  !_cfg->_blocks[i+1]->head()->is_Loop() ) {
+              i++;
+              nb = _cfg->_blocks[i];
+              inst_cnt  = nb->compute_first_inst_size(sum_size, inst_cnt,
+                                                      _regalloc);
+            } // while( inst_cnt > 0 && i < last_block  )
+          } // if( bx != b )
+        } // if( inst_cnt > 0 && i < last_block )
+        b->set_first_inst_size(sum_size);
+      } // f( b->head()->is_Loop() )
+    } // for( i <= last_block )
+  } // if( MaxLoopPad < OptoLoopAlignment-1 )
+}
+
+//----------------------Shorten_branches---------------------------------------
+// The architecture description provides short branch variants for some long
+// branch instructions. Replace eligible long branches with short branches.
+void Compile::Shorten_branches(Label *labels, int& code_size, int& reloc_size, int& stub_size, int& const_size) {
+
+  // fill in the nop array for bundling computations
+  MachNode *_nop_list[Bundle::_nop_count];
+  Bundle::initialize_nops(_nop_list, this);
+
+  // ------------------
+  // Compute size of each block, method size, and relocation information size
+  uint *jmp_end    = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks);
+  uint *blk_starts = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks+1);
+  DEBUG_ONLY( uint *jmp_target = NEW_RESOURCE_ARRAY(uint,_cfg->_num_blocks); )
+  blk_starts[0]    = 0;
+
+  // Initialize the sizes to 0
+  code_size  = 0;          // Size in bytes of generated code
+  stub_size  = 0;          // Size in bytes of all stub entries
+  // Size in bytes of all relocation entries, including those in local stubs.
+  // Start with 2-bytes of reloc info for the unvalidated entry point
+  reloc_size = 1;          // Number of relocation entries
+  const_size = 0;          // size of fp constants in words
+
+  // Make three passes.  The first computes pessimistic blk_starts,
+  // relative jmp_end, reloc_size and const_size information.
+  // The second performs short branch substitution using the pessimistic
+  // sizing. The third inserts nops where needed.
+
+  Node *nj; // tmp
+
+  // Step one, perform a pessimistic sizing pass.
+  uint i;
+  uint min_offset_from_last_call = 1;  // init to a positive value
+  uint nop_size = (new (this) MachNopNode())->size(_regalloc);
+  for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+    Block *b = _cfg->_blocks[i];
+
+    // Sum all instruction sizes to compute block size
+    uint last_inst = b->_nodes.size();
+    uint blk_size = 0;
+    for( uint j = 0; j<last_inst; j++ ) {
+      nj = b->_nodes[j];
+      uint inst_size = nj->size(_regalloc);
+      blk_size += inst_size;
+      // Handle machine instruction nodes
+      if( nj->is_Mach() ) {
+        MachNode *mach = nj->as_Mach();
+        blk_size += (mach->alignment_required() - 1) * relocInfo::addr_unit(); // assume worst case padding
+        reloc_size += mach->reloc();
+        const_size += mach->const_size();
+        if( mach->is_MachCall() ) {
+          MachCallNode *mcall = mach->as_MachCall();
+          // This destination address is NOT PC-relative
+
+          mcall->method_set((intptr_t)mcall->entry_point());
+
+          if( mcall->is_MachCallJava() && mcall->as_MachCallJava()->_method ) {
+            stub_size  += size_java_to_interp();
+            reloc_size += reloc_java_to_interp();
+          }
+        } else if (mach->is_MachSafePoint()) {
+          // If call/safepoint are adjacent, account for possible
+          // nop to disambiguate the two safepoints.
+          if (min_offset_from_last_call == 0) {
+            blk_size += nop_size;
+          }
+        }
+      }
+      min_offset_from_last_call += inst_size;
+      // Remember end of call offset
+      if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
+        min_offset_from_last_call = 0;
+      }
+    }
+
+    // During short branch replacement, we store the relative (to blk_starts)
+    // end of jump in jmp_end, rather than the absolute end of jump.  This
+    // is so that we do not need to recompute sizes of all nodes when we compute
+    // correct blk_starts in our next sizing pass.
+    jmp_end[i] = blk_size;
+    DEBUG_ONLY( jmp_target[i] = 0; )
+
+    // When the next block starts a loop, we may insert pad NOP
+    // instructions.  Since we cannot know our future alignment,
+    // assume the worst.
+    if( i<_cfg->_num_blocks-1 ) {
+      Block *nb = _cfg->_blocks[i+1];
+      int max_loop_pad = nb->code_alignment()-relocInfo::addr_unit();
+      if( max_loop_pad > 0 ) {
+        assert(is_power_of_2(max_loop_pad+relocInfo::addr_unit()), "");
+        blk_size += max_loop_pad;
+      }
+    }
+
+    // Save block size; update total method size
+    blk_starts[i+1] = blk_starts[i]+blk_size;
+  }
+
+  // Step two, replace eligible long jumps.
+
+  // Note: this will only get the long branches within short branch
+  //   range. Another pass might detect more branches that became
+  //   candidates because the shortening in the first pass exposed
+  //   more opportunities. Unfortunately, this would require
+  //   recomputing the starting and ending positions for the blocks
+  for( i=0; i<_cfg->_num_blocks; i++ ) {
+    Block *b = _cfg->_blocks[i];
+
+    int j;
+    // Find the branch; ignore trailing NOPs.
+    for( j = b->_nodes.size()-1; j>=0; j-- ) {
+      nj = b->_nodes[j];
+      if( !nj->is_Mach() || nj->as_Mach()->ideal_Opcode() != Op_Con )
+        break;
+    }
+
+    if (j >= 0) {
+      if( nj->is_Mach() && nj->as_Mach()->may_be_short_branch() ) {
+        MachNode *mach = nj->as_Mach();
+        // This requires the TRUE branch target be in succs[0]
+        uint bnum = b->non_connector_successor(0)->_pre_order;
+        uintptr_t target = blk_starts[bnum];
+        if( mach->is_pc_relative() ) {
+          int offset = target-(blk_starts[i] + jmp_end[i]);
+          if (_matcher->is_short_branch_offset(offset)) {
+            // We've got a winner.  Replace this branch.
+            MachNode *replacement = mach->short_branch_version(this);
+            b->_nodes.map(j, replacement);
+
+            // Update the jmp_end size to save time in our
+            // next pass.
+            jmp_end[i] -= (mach->size(_regalloc) - replacement->size(_regalloc));
+            DEBUG_ONLY( jmp_target[i] = bnum; );
+          }
+        } else {
+#ifndef PRODUCT
+          mach->dump(3);
+#endif
+          Unimplemented();
+        }
+      }
+    }
+  }
+
+  // Compute the size of first NumberOfLoopInstrToAlign instructions at head
+  // of a loop. It is used to determine the padding for loop alignment.
+  compute_loop_first_inst_sizes();
+
+  // Step 3, compute the offsets of all the labels
+  uint last_call_adr = max_uint;
+  for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+    // copy the offset of the beginning to the corresponding label
+    assert(labels[i].is_unused(), "cannot patch at this point");
+    labels[i].bind_loc(blk_starts[i], CodeBuffer::SECT_INSTS);
+
+    // insert padding for any instructions that need it
+    Block *b = _cfg->_blocks[i];
+    uint last_inst = b->_nodes.size();
+    uint adr = blk_starts[i];
+    for( uint j = 0; j<last_inst; j++ ) {
+      nj = b->_nodes[j];
+      if( nj->is_Mach() ) {
+        int padding = nj->as_Mach()->compute_padding(adr);
+        // If call/safepoint are adjacent insert a nop (5010568)
+        if (padding == 0 && nj->is_MachSafePoint() && !nj->is_MachCall() &&
+            adr == last_call_adr ) {
+          padding = nop_size;
+        }
+        if(padding > 0) {
+          assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
+          int nops_cnt = padding / nop_size;
+          MachNode *nop = new (this) MachNopNode(nops_cnt);
+          b->_nodes.insert(j++, nop);
+          _cfg->_bbs.map( nop->_idx, b );
+          adr += padding;
+          last_inst++;
+        }
+      }
+      adr += nj->size(_regalloc);
+
+      // Remember end of call offset
+      if (nj->is_MachCall() && nj->as_MachCall()->is_safepoint_node()) {
+        last_call_adr = adr;
+      }
+    }
+
+    if ( i != _cfg->_num_blocks-1) {
+      // Get the size of the block
+      uint blk_size = adr - blk_starts[i];
+
+      // When the next block starts a loop, we may insert pad NOP
+      // instructions.
+      Block *nb = _cfg->_blocks[i+1];
+      int current_offset = blk_starts[i] + blk_size;
+      current_offset += nb->alignment_padding(current_offset);
+      // Save block size; update total method size
+      blk_starts[i+1] = current_offset;
+    }
+  }
+
+#ifdef ASSERT
+  for( i=0; i<_cfg->_num_blocks; i++ ) { // For all blocks
+    if( jmp_target[i] != 0 ) {
+      int offset = blk_starts[jmp_target[i]]-(blk_starts[i] + jmp_end[i]);
+      if (!_matcher->is_short_branch_offset(offset)) {
+        tty->print_cr("target (%d) - jmp_end(%d) = offset (%d), jmp_block B%d, target_block B%d", blk_starts[jmp_target[i]], blk_starts[i] + jmp_end[i], offset, i, jmp_target[i]);
+      }
+      assert(_matcher->is_short_branch_offset(offset), "Displacement too large for short jmp");
+    }
+  }
+#endif
+
+  // ------------------
+  // Compute size for code buffer
+  code_size   = blk_starts[i-1] + jmp_end[i-1];
+
+  // Relocation records
+  reloc_size += 1;              // Relo entry for exception handler
+
+  // Adjust reloc_size to number of record of relocation info
+  // Min is 2 bytes, max is probably 6 or 8, with a tax up to 25% for
+  // a relocation index.
+  // The CodeBuffer will expand the locs array if this estimate is too low.
+  reloc_size   *= 10 / sizeof(relocInfo);
+
+  // Adjust const_size to number of bytes
+  const_size   *= 2*jintSize; // both float and double take two words per entry
+
+}
+
+//------------------------------FillLocArray-----------------------------------
+// Create a bit of debug info and append it to the array.  The mapping is from
+// Java local or expression stack to constant, register or stack-slot.  For
+// doubles, insert 2 mappings and return 1 (to tell the caller that the next
+// entry has been taken care of and caller should skip it).
+static LocationValue *new_loc_value( PhaseRegAlloc *ra, OptoReg::Name regnum, Location::Type l_type ) {
+  // This should never have accepted Bad before
+  assert(OptoReg::is_valid(regnum), "location must be valid");
+  return (OptoReg::is_reg(regnum))
+    ? new LocationValue(Location::new_reg_loc(l_type, OptoReg::as_VMReg(regnum)) )
+    : new LocationValue(Location::new_stk_loc(l_type,  ra->reg2offset(regnum)));
+}
+
+void Compile::FillLocArray( int idx, Node *local, GrowableArray<ScopeValue*> *array ) {
+  assert( local, "use _top instead of null" );
+  if (array->length() != idx) {
+    assert(array->length() == idx + 1, "Unexpected array count");
+    // Old functionality:
+    //   return
+    // New functionality:
+    //   Assert if the local is not top. In product mode let the new node
+    //   override the old entry.
+    assert(local == top(), "LocArray collision");
+    if (local == top()) {
+      return;
+    }
+    array->pop();
+  }
+  const Type *t = local->bottom_type();
+
+  // Grab the register number for the local
+  OptoReg::Name regnum = _regalloc->get_reg_first(local);
+  if( OptoReg::is_valid(regnum) ) {// Got a register/stack?
+    // Record the double as two float registers.
+    // The register mask for such a value always specifies two adjacent
+    // float registers, with the lower register number even.
+    // Normally, the allocation of high and low words to these registers
+    // is irrelevant, because nearly all operations on register pairs
+    // (e.g., StoreD) treat them as a single unit.
+    // Here, we assume in addition that the words in these two registers
+    // stored "naturally" (by operations like StoreD and double stores
+    // within the interpreter) such that the lower-numbered register
+    // is written to the lower memory address.  This may seem like
+    // a machine dependency, but it is not--it is a requirement on
+    // the author of the <arch>.ad file to ensure that, for every
+    // even/odd double-register pair to which a double may be allocated,
+    // the word in the even single-register is stored to the first
+    // memory word.  (Note that register numbers are completely
+    // arbitrary, and are not tied to any machine-level encodings.)
+#ifdef _LP64
+    if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon ) {
+      array->append(new ConstantIntValue(0));
+      array->append(new_loc_value( _regalloc, regnum, Location::dbl ));
+    } else if ( t->base() == Type::Long ) {
+      array->append(new ConstantIntValue(0));
+      array->append(new_loc_value( _regalloc, regnum, Location::lng ));
+    } else if ( t->base() == Type::RawPtr ) {
+      // jsr/ret return address which must be restored into a the full
+      // width 64-bit stack slot.
+      array->append(new_loc_value( _regalloc, regnum, Location::lng ));
+    }
+#else //_LP64
+#ifdef SPARC
+    if (t->base() == Type::Long && OptoReg::is_reg(regnum)) {
+      // For SPARC we have to swap high and low words for
+      // long values stored in a single-register (g0-g7).
+      array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
+      array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
+    } else
+#endif //SPARC
+    if( t->base() == Type::DoubleBot || t->base() == Type::DoubleCon || t->base() == Type::Long ) {
+      // Repack the double/long as two jints.
+      // The convention the interpreter uses is that the second local
+      // holds the first raw word of the native double representation.
+      // This is actually reasonable, since locals and stack arrays
+      // grow downwards in all implementations.
+      // (If, on some machine, the interpreter's Java locals or stack
+      // were to grow upwards, the embedded doubles would be word-swapped.)
+      array->append(new_loc_value( _regalloc, OptoReg::add(regnum,1), Location::normal ));
+      array->append(new_loc_value( _regalloc,              regnum   , Location::normal ));
+    }
+#endif //_LP64
+    else if( (t->base() == Type::FloatBot || t->base() == Type::FloatCon) &&
+               OptoReg::is_reg(regnum) ) {
+      array->append(new_loc_value( _regalloc, regnum, Matcher::float_in_double
+                                   ? Location::float_in_dbl : Location::normal ));
+    } else if( t->base() == Type::Int && OptoReg::is_reg(regnum) ) {
+      array->append(new_loc_value( _regalloc, regnum, Matcher::int_in_long
+                                   ? Location::int_in_long : Location::normal ));
+    } else {
+      array->append(new_loc_value( _regalloc, regnum, _regalloc->is_oop(local) ? Location::oop : Location::normal ));
+    }
+    return;
+  }
+
+  // No register.  It must be constant data.
+  switch (t->base()) {
+  case Type::Half:              // Second half of a double
+    ShouldNotReachHere();       // Caller should skip 2nd halves
+    break;
+  case Type::AnyPtr:
+    array->append(new ConstantOopWriteValue(NULL));
+    break;
+  case Type::AryPtr:
+  case Type::InstPtr:
+  case Type::KlassPtr:          // fall through
+    array->append(new ConstantOopWriteValue(t->isa_oopptr()->const_oop()->encoding()));
+    break;
+  case Type::Int:
+    array->append(new ConstantIntValue(t->is_int()->get_con()));
+    break;
+  case Type::RawPtr:
+    // A return address (T_ADDRESS).
+    assert((intptr_t)t->is_ptr()->get_con() < (intptr_t)0x10000, "must be a valid BCI");
+#ifdef _LP64
+    // Must be restored to the full-width 64-bit stack slot.
+    array->append(new ConstantLongValue(t->is_ptr()->get_con()));
+#else
+    array->append(new ConstantIntValue(t->is_ptr()->get_con()));
+#endif
+    break;
+  case Type::FloatCon: {
+    float f = t->is_float_constant()->getf();
+    array->append(new ConstantIntValue(jint_cast(f)));
+    break;
+  }
+  case Type::DoubleCon: {
+    jdouble d = t->is_double_constant()->getd();
+#ifdef _LP64
+    array->append(new ConstantIntValue(0));
+    array->append(new ConstantDoubleValue(d));
+#else
+    // Repack the double as two jints.
+    // The convention the interpreter uses is that the second local
+    // holds the first raw word of the native double representation.
+    // This is actually reasonable, since locals and stack arrays
+    // grow downwards in all implementations.
+    // (If, on some machine, the interpreter's Java locals or stack
+    // were to grow upwards, the embedded doubles would be word-swapped.)
+    jint   *dp = (jint*)&d;
+    array->append(new ConstantIntValue(dp[1]));
+    array->append(new ConstantIntValue(dp[0]));
+#endif
+    break;
+  }
+  case Type::Long: {
+    jlong d = t->is_long()->get_con();
+#ifdef _LP64
+    array->append(new ConstantIntValue(0));
+    array->append(new ConstantLongValue(d));
+#else
+    // Repack the long as two jints.
+    // The convention the interpreter uses is that the second local
+    // holds the first raw word of the native double representation.
+    // This is actually reasonable, since locals and stack arrays
+    // grow downwards in all implementations.
+    // (If, on some machine, the interpreter's Java locals or stack
+    // were to grow upwards, the embedded doubles would be word-swapped.)
+    jint *dp = (jint*)&d;
+    array->append(new ConstantIntValue(dp[1]));
+    array->append(new ConstantIntValue(dp[0]));
+#endif
+    break;
+  }
+  case Type::Top:               // Add an illegal value here
+    array->append(new LocationValue(Location()));
+    break;
+  default:
+    ShouldNotReachHere();
+    break;
+  }
+}
+
+// Determine if this node starts a bundle
+bool Compile::starts_bundle(const Node *n) const {
+  return (_node_bundling_limit > n->_idx &&
+          _node_bundling_base[n->_idx].starts_bundle());
+}
+
+//--------------------------Process_OopMap_Node--------------------------------
+void Compile::Process_OopMap_Node(MachNode *mach, int current_offset) {
+
+  // Handle special safepoint nodes for synchronization
+  MachSafePointNode *sfn   = mach->as_MachSafePoint();
+  MachCallNode      *mcall;
+
+#ifdef ENABLE_ZAP_DEAD_LOCALS
+  assert( is_node_getting_a_safepoint(mach),  "logic does not match; false negative");
+#endif
+
+  int safepoint_pc_offset = current_offset;
+
+  // Add the safepoint in the DebugInfoRecorder
+  if( !mach->is_MachCall() ) {
+    mcall = NULL;
+    debug_info()->add_safepoint(safepoint_pc_offset, sfn->_oop_map);
+  } else {
+    mcall = mach->as_MachCall();
+    safepoint_pc_offset += mcall->ret_addr_offset();
+    debug_info()->add_safepoint(safepoint_pc_offset, mcall->_oop_map);
+  }
+
+  // Loop over the JVMState list to add scope information
+  // Do not skip safepoints with a NULL method, they need monitor info
+  JVMState* youngest_jvms = sfn->jvms();
+  int max_depth = youngest_jvms->depth();
+
+  // Visit scopes from oldest to youngest.
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    int idx;
+    ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
+    // Safepoints that do not have method() set only provide oop-map and monitor info
+    // to support GC; these do not support deoptimization.
+    int num_locs = (method == NULL) ? 0 : jvms->loc_size();
+    int num_exps = (method == NULL) ? 0 : jvms->stk_size();
+    int num_mon  = jvms->nof_monitors();
+    assert(method == NULL || jvms->bci() < 0 || num_locs == method->max_locals(),
+           "JVMS local count must match that of the method");
+
+    // Add Local and Expression Stack Information
+
+    // Insert locals into the locarray
+    GrowableArray<ScopeValue*> *locarray = new GrowableArray<ScopeValue*>(num_locs);
+    for( idx = 0; idx < num_locs; idx++ ) {
+      FillLocArray( idx, sfn->local(jvms, idx), locarray );
+    }
+
+    // Insert expression stack entries into the exparray
+    GrowableArray<ScopeValue*> *exparray = new GrowableArray<ScopeValue*>(num_exps);
+    for( idx = 0; idx < num_exps; idx++ ) {
+      FillLocArray( idx,  sfn->stack(jvms, idx), exparray );
+    }
+
+    // Add in mappings of the monitors
+    assert( !method ||
+            !method->is_synchronized() ||
+            method->is_native() ||
+            num_mon > 0 ||
+            !GenerateSynchronizationCode,
+            "monitors must always exist for synchronized methods");
+
+    // Build the growable array of ScopeValues for exp stack
+    GrowableArray<MonitorValue*> *monarray = new GrowableArray<MonitorValue*>(num_mon);
+
+    // Loop over monitors and insert into array
+    for(idx = 0; idx < num_mon; idx++) {
+      // Grab the node that defines this monitor
+      Node* box_node;
+      Node* obj_node;
+      box_node = sfn->monitor_box(jvms, idx);
+      obj_node = sfn->monitor_obj(jvms, idx);
+
+      // Create ScopeValue for object
+      ScopeValue *scval = NULL;
+      if( !obj_node->is_Con() ) {
+        OptoReg::Name obj_reg = _regalloc->get_reg_first(obj_node);
+        scval = new_loc_value( _regalloc, obj_reg, Location::oop );
+      } else {
+        scval = new ConstantOopWriteValue(obj_node->bottom_type()->is_instptr()->const_oop()->encoding());
+      }
+
+      OptoReg::Name box_reg = BoxLockNode::stack_slot(box_node);
+      monarray->append(new MonitorValue(scval, Location::new_stk_loc(Location::normal,_regalloc->reg2offset(box_reg))));
+    }
+
+    // Build first class objects to pass to scope
+    DebugToken *locvals = debug_info()->create_scope_values(locarray);
+    DebugToken *expvals = debug_info()->create_scope_values(exparray);
+    DebugToken *monvals = debug_info()->create_monitor_values(monarray);
+
+    // Make method available for all Safepoints
+    ciMethod* scope_method = method ? method : _method;
+    // Describe the scope here
+    assert(jvms->bci() >= InvocationEntryBci && jvms->bci() <= 0x10000, "must be a valid or entry BCI");
+    debug_info()->describe_scope(safepoint_pc_offset,scope_method,jvms->bci(),locvals,expvals,monvals);
+  } // End jvms loop
+
+  // Mark the end of the scope set.
+  debug_info()->end_safepoint(safepoint_pc_offset);
+}
+
+
+
+// A simplified version of Process_OopMap_Node, to handle non-safepoints.
+class NonSafepointEmitter {
+  Compile*  C;
+  JVMState* _pending_jvms;
+  int       _pending_offset;
+
+  void emit_non_safepoint();
+
+ public:
+  NonSafepointEmitter(Compile* compile) {
+    this->C = compile;
+    _pending_jvms = NULL;
+    _pending_offset = 0;
+  }
+
+  void observe_instruction(Node* n, int pc_offset) {
+    if (!C->debug_info()->recording_non_safepoints())  return;
+
+    Node_Notes* nn = C->node_notes_at(n->_idx);
+    if (nn == NULL || nn->jvms() == NULL)  return;
+    if (_pending_jvms != NULL &&
+        _pending_jvms->same_calls_as(nn->jvms())) {
+      // Repeated JVMS?  Stretch it up here.
+      _pending_offset = pc_offset;
+    } else {
+      if (_pending_jvms != NULL &&
+          _pending_offset < pc_offset) {
+        emit_non_safepoint();
+      }
+      _pending_jvms = NULL;
+      if (pc_offset > C->debug_info()->last_pc_offset()) {
+        // This is the only way _pending_jvms can become non-NULL:
+        _pending_jvms = nn->jvms();
+        _pending_offset = pc_offset;
+      }
+    }
+  }
+
+  // Stay out of the way of real safepoints:
+  void observe_safepoint(JVMState* jvms, int pc_offset) {
+    if (_pending_jvms != NULL &&
+        !_pending_jvms->same_calls_as(jvms) &&
+        _pending_offset < pc_offset) {
+      emit_non_safepoint();
+    }
+    _pending_jvms = NULL;
+  }
+
+  void flush_at_end() {
+    if (_pending_jvms != NULL) {
+      emit_non_safepoint();
+    }
+    _pending_jvms = NULL;
+  }
+};
+
+void NonSafepointEmitter::emit_non_safepoint() {
+  JVMState* youngest_jvms = _pending_jvms;
+  int       pc_offset     = _pending_offset;
+
+  // Clear it now:
+  _pending_jvms = NULL;
+
+  DebugInformationRecorder* debug_info = C->debug_info();
+  assert(debug_info->recording_non_safepoints(), "sanity");
+
+  debug_info->add_non_safepoint(pc_offset);
+  int max_depth = youngest_jvms->depth();
+
+  // Visit scopes from oldest to youngest.
+  for (int depth = 1; depth <= max_depth; depth++) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    ciMethod* method = jvms->has_method() ? jvms->method() : NULL;
+    debug_info->describe_scope(pc_offset, method, jvms->bci());
+  }
+
+  // Mark the end of the scope set.
+  debug_info->end_non_safepoint(pc_offset);
+}
+
+
+
+// helper for Fill_buffer bailout logic
+static void turn_off_compiler(Compile* C) {
+  if (CodeCache::unallocated_capacity() >= CodeCacheMinimumFreeSpace*10) {
+    // Do not turn off compilation if a single giant method has
+    // blown the code cache size.
+    C->record_failure("excessive request to CodeCache");
+  } else {
+    UseInterpreter            = true;
+    UseCompiler               = false;
+    AlwaysCompileLoopMethods  = false;
+    C->record_failure("CodeCache is full");
+    warning("CodeCache is full. Compiling has been disabled");
+  }
+}
+
+
+//------------------------------Fill_buffer------------------------------------
+void Compile::Fill_buffer() {
+
+  // Set the initially allocated size
+  int  code_req   = initial_code_capacity;
+  int  locs_req   = initial_locs_capacity;
+  int  stub_req   = TraceJumps ? initial_stub_capacity * 10 : initial_stub_capacity;
+  int  const_req  = initial_const_capacity;
+  bool labels_not_set = true;
+
+  int  pad_req    = NativeCall::instruction_size;
+  // The extra spacing after the code is necessary on some platforms.
+  // Sometimes we need to patch in a jump after the last instruction,
+  // if the nmethod has been deoptimized.  (See 4932387, 4894843.)
+
+  uint i;
+  // Compute the byte offset where we can store the deopt pc.
+  if (fixed_slots() != 0) {
+    _orig_pc_slot_offset_in_bytes = _regalloc->reg2offset(OptoReg::stack2reg(_orig_pc_slot));
+  }
+
+  // Compute prolog code size
+  _method_size = 0;
+  _frame_slots = OptoReg::reg2stack(_matcher->_old_SP)+_regalloc->_framesize;
+#ifdef IA64
+  if (save_argument_registers()) {
+    // 4815101: this is a stub with implicit and unknown precision fp args.
+    // The usual spill mechanism can only generate stfd's in this case, which
+    // doesn't work if the fp reg to spill contains a single-precision denorm.
+    // Instead, we hack around the normal spill mechanism using stfspill's and
+    // ldffill's in the MachProlog and MachEpilog emit methods.  We allocate
+    // space here for the fp arg regs (f8-f15) we're going to thusly spill.
+    //
+    // If we ever implement 16-byte 'registers' == stack slots, we can
+    // get rid of this hack and have SpillCopy generate stfspill/ldffill
+    // instead of stfd/stfs/ldfd/ldfs.
+    _frame_slots += 8*(16/BytesPerInt);
+  }
+#endif
+  assert( _frame_slots >= 0 && _frame_slots < 1000000, "sanity check" );
+
+  // Create an array of unused labels, one for each basic block
+  Label *blk_labels = NEW_RESOURCE_ARRAY(Label, _cfg->_num_blocks+1);
+
+  for( i=0; i <= _cfg->_num_blocks; i++ ) {
+    blk_labels[i].init();
+  }
+
+  // If this machine supports different size branch offsets, then pre-compute
+  // the length of the blocks
+  if( _matcher->is_short_branch_offset(0) ) {
+    Shorten_branches(blk_labels, code_req, locs_req, stub_req, const_req);
+    labels_not_set = false;
+  }
+
+  // nmethod and CodeBuffer count stubs & constants as part of method's code.
+  int exception_handler_req = size_exception_handler();
+  int deopt_handler_req = size_deopt_handler();
+  exception_handler_req += MAX_stubs_size; // add marginal slop for handler
+  deopt_handler_req += MAX_stubs_size; // add marginal slop for handler
+  stub_req += MAX_stubs_size;   // ensure per-stub margin
+  code_req += MAX_inst_size;    // ensure per-instruction margin
+  if (StressCodeBuffers)
+    code_req = const_req = stub_req = exception_handler_req = deopt_handler_req = 0x10;  // force expansion
+  int total_req = code_req + pad_req + stub_req + exception_handler_req + deopt_handler_req + const_req;
+  CodeBuffer* cb = code_buffer();
+  cb->initialize(total_req, locs_req);
+
+  // Have we run out of code space?
+  if (cb->blob() == NULL) {
+    turn_off_compiler(this);
+    return;
+  }
+  // Configure the code buffer.
+  cb->initialize_consts_size(const_req);
+  cb->initialize_stubs_size(stub_req);
+  cb->initialize_oop_recorder(env()->oop_recorder());
+
+  // fill in the nop array for bundling computations
+  MachNode *_nop_list[Bundle::_nop_count];
+  Bundle::initialize_nops(_nop_list, this);
+
+  // Create oopmap set.
+  _oop_map_set = new OopMapSet();
+
+  // !!!!! This preserves old handling of oopmaps for now
+  debug_info()->set_oopmaps(_oop_map_set);
+
+  // Count and start of implicit null check instructions
+  uint inct_cnt = 0;
+  uint *inct_starts = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
+
+  // Count and start of calls
+  uint *call_returns = NEW_RESOURCE_ARRAY(uint, _cfg->_num_blocks+1);
+
+  uint  return_offset = 0;
+  MachNode *nop = new (this) MachNopNode();
+
+  int previous_offset = 0;
+  int current_offset  = 0;
+  int last_call_offset = -1;
+
+  // Create an array of unused labels, one for each basic block, if printing is enabled
+#ifndef PRODUCT
+  int *node_offsets      = NULL;
+  uint  node_offset_limit = unique();
+
+
+  if ( print_assembly() )
+    node_offsets         = NEW_RESOURCE_ARRAY(int, node_offset_limit);
+#endif
+
+  NonSafepointEmitter non_safepoints(this);  // emit non-safepoints lazily
+
+  // ------------------
+  // Now fill in the code buffer
+  Node *delay_slot = NULL;
+
+  for( i=0; i < _cfg->_num_blocks; i++ ) {
+    Block *b = _cfg->_blocks[i];
+
+    Node *head = b->head();
+
+    // If this block needs to start aligned (i.e, can be reached other
+    // than by falling-thru from the previous block), then force the
+    // start of a new bundle.
+    if( Pipeline::requires_bundling() && starts_bundle(head) )
+      cb->flush_bundle(true);
+
+    // Define the label at the beginning of the basic block
+    if( labels_not_set )
+      MacroAssembler(cb).bind( blk_labels[b->_pre_order] );
+
+    else
+      assert( blk_labels[b->_pre_order].loc_pos() == cb->code_size(),
+              "label position does not match code offset" );
+
+    uint last_inst = b->_nodes.size();
+
+    // Emit block normally, except for last instruction.
+    // Emit means "dump code bits into code buffer".
+    for( uint j = 0; j<last_inst; j++ ) {
+
+      // Get the node
+      Node* n = b->_nodes[j];
+
+      // See if delay slots are supported
+      if (valid_bundle_info(n) &&
+          node_bundling(n)->used_in_unconditional_delay()) {
+        assert(delay_slot == NULL, "no use of delay slot node");
+        assert(n->size(_regalloc) == Pipeline::instr_unit_size(), "delay slot instruction wrong size");
+
+        delay_slot = n;
+        continue;
+      }
+
+      // If this starts a new instruction group, then flush the current one
+      // (but allow split bundles)
+      if( Pipeline::requires_bundling() && starts_bundle(n) )
+        cb->flush_bundle(false);
+
+      // The following logic is duplicated in the code ifdeffed for
+      // ENABLE_ZAP_DEAD_LOCALS which apppears above in this file.  It
+      // should be factored out.  Or maybe dispersed to the nodes?
+
+      // Special handling for SafePoint/Call Nodes
+      bool is_mcall = false;
+      if( n->is_Mach() ) {
+        MachNode *mach = n->as_Mach();
+        is_mcall = n->is_MachCall();
+        bool is_sfn = n->is_MachSafePoint();
+
+        // If this requires all previous instructions be flushed, then do so
+        if( is_sfn || is_mcall || mach->alignment_required() != 1) {
+          cb->flush_bundle(true);
+          current_offset = cb->code_size();
+        }
+
+        // align the instruction if necessary
+        int nop_size = nop->size(_regalloc);
+        int padding = mach->compute_padding(current_offset);
+        // Make sure safepoint node for polling is distinct from a call's
+        // return by adding a nop if needed.
+        if (is_sfn && !is_mcall && padding == 0 && current_offset == last_call_offset ) {
+          padding = nop_size;
+        }
+        assert( labels_not_set || padding == 0, "instruction should already be aligned")
+
+        if(padding > 0) {
+          assert((padding % nop_size) == 0, "padding is not a multiple of NOP size");
+          int nops_cnt = padding / nop_size;
+          MachNode *nop = new (this) MachNopNode(nops_cnt);
+          b->_nodes.insert(j++, nop);
+          last_inst++;
+          _cfg->_bbs.map( nop->_idx, b );
+          nop->emit(*cb, _regalloc);
+          cb->flush_bundle(true);
+          current_offset = cb->code_size();
+        }
+
+        // Remember the start of the last call in a basic block
+        if (is_mcall) {
+          MachCallNode *mcall = mach->as_MachCall();
+
+          // This destination address is NOT PC-relative
+          mcall->method_set((intptr_t)mcall->entry_point());
+
+          // Save the return address
+          call_returns[b->_pre_order] = current_offset + mcall->ret_addr_offset();
+
+          if (!mcall->is_safepoint_node()) {
+            is_mcall = false;
+            is_sfn = false;
+          }
+        }
+
+        // sfn will be valid whenever mcall is valid now because of inheritance
+        if( is_sfn || is_mcall ) {
+
+          // Handle special safepoint nodes for synchronization
+          if( !is_mcall ) {
+            MachSafePointNode *sfn = mach->as_MachSafePoint();
+            // !!!!! Stubs only need an oopmap right now, so bail out
+            if( sfn->jvms()->method() == NULL) {
+              // Write the oopmap directly to the code blob??!!
+#             ifdef ENABLE_ZAP_DEAD_LOCALS
+              assert( !is_node_getting_a_safepoint(sfn),  "logic does not match; false positive");
+#             endif
+              continue;
+            }
+          } // End synchronization
+
+          non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
+                                           current_offset);
+          Process_OopMap_Node(mach, current_offset);
+        } // End if safepoint
+
+        // If this is a null check, then add the start of the previous instruction to the list
+        else if( mach->is_MachNullCheck() ) {
+          inct_starts[inct_cnt++] = previous_offset;
+        }
+
+        // If this is a branch, then fill in the label with the target BB's label
+        else if ( mach->is_Branch() ) {
+
+          if ( mach->ideal_Opcode() == Op_Jump ) {
+            for (uint h = 0; h < b->_num_succs; h++ ) {
+              Block* succs_block = b->_succs[h];
+              for (uint j = 1; j < succs_block->num_preds(); j++) {
+                Node* jpn = succs_block->pred(j);
+                if ( jpn->is_JumpProj() && jpn->in(0) == mach ) {
+                  uint block_num = succs_block->non_connector()->_pre_order;
+                  Label *blkLabel = &blk_labels[block_num];
+                  mach->add_case_label(jpn->as_JumpProj()->proj_no(), blkLabel);
+                }
+              }
+            }
+          } else {
+            // For Branchs
+            // This requires the TRUE branch target be in succs[0]
+            uint block_num = b->non_connector_successor(0)->_pre_order;
+            mach->label_set( blk_labels[block_num], block_num );
+          }
+        }
+
+#ifdef ASSERT
+        // Check that oop-store preceeds the card-mark
+        else if( mach->ideal_Opcode() == Op_StoreCM ) {
+          uint storeCM_idx = j;
+          Node *oop_store = mach->in(mach->_cnt);  // First precedence edge
+          assert( oop_store != NULL, "storeCM expects a precedence edge");
+          uint i4;
+          for( i4 = 0; i4 < last_inst; ++i4 ) {
+            if( b->_nodes[i4] == oop_store ) break;
+          }
+          // Note: This test can provide a false failure if other precedence
+          // edges have been added to the storeCMNode.
+          assert( i4 == last_inst || i4 < storeCM_idx, "CM card-mark executes before oop-store");
+        }
+#endif
+
+        else if( !n->is_Proj() ) {
+          // Remember the begining of the previous instruction, in case
+          // it's followed by a flag-kill and a null-check.  Happens on
+          // Intel all the time, with add-to-memory kind of opcodes.
+          previous_offset = current_offset;
+        }
+      }
+
+      // Verify that there is sufficient space remaining
+      cb->insts()->maybe_expand_to_ensure_remaining(MAX_inst_size);
+      if (cb->blob() == NULL) {
+        turn_off_compiler(this);
+        return;
+      }
+
+      // Save the offset for the listing
+#ifndef PRODUCT
+      if( node_offsets && n->_idx < node_offset_limit )
+        node_offsets[n->_idx] = cb->code_size();
+#endif
+
+      // "Normal" instruction case
+      n->emit(*cb, _regalloc);
+      current_offset  = cb->code_size();
+      non_safepoints.observe_instruction(n, current_offset);
+
+      // mcall is last "call" that can be a safepoint
+      // record it so we can see if a poll will directly follow it
+      // in which case we'll need a pad to make the PcDesc sites unique
+      // see  5010568. This can be slightly inaccurate but conservative
+      // in the case that return address is not actually at current_offset.
+      // This is a small price to pay.
+
+      if (is_mcall) {
+        last_call_offset = current_offset;
+      }
+
+      // See if this instruction has a delay slot
+      if ( valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
+        assert(delay_slot != NULL, "expecting delay slot node");
+
+        // Back up 1 instruction
+        cb->set_code_end(
+          cb->code_end()-Pipeline::instr_unit_size());
+
+        // Save the offset for the listing
+#ifndef PRODUCT
+        if( node_offsets && delay_slot->_idx < node_offset_limit )
+          node_offsets[delay_slot->_idx] = cb->code_size();
+#endif
+
+        // Support a SafePoint in the delay slot
+        if( delay_slot->is_MachSafePoint() ) {
+          MachNode *mach = delay_slot->as_Mach();
+          // !!!!! Stubs only need an oopmap right now, so bail out
+          if( !mach->is_MachCall() && mach->as_MachSafePoint()->jvms()->method() == NULL ) {
+            // Write the oopmap directly to the code blob??!!
+#           ifdef ENABLE_ZAP_DEAD_LOCALS
+            assert( !is_node_getting_a_safepoint(mach),  "logic does not match; false positive");
+#           endif
+            delay_slot = NULL;
+            continue;
+          }
+
+          int adjusted_offset = current_offset - Pipeline::instr_unit_size();
+          non_safepoints.observe_safepoint(mach->as_MachSafePoint()->jvms(),
+                                           adjusted_offset);
+          // Generate an OopMap entry
+          Process_OopMap_Node(mach, adjusted_offset);
+        }
+
+        // Insert the delay slot instruction
+        delay_slot->emit(*cb, _regalloc);
+
+        // Don't reuse it
+        delay_slot = NULL;
+      }
+
+    } // End for all instructions in block
+
+    // If the next block _starts_ a loop, pad this block out to align
+    // the loop start a little. Helps prevent pipe stalls at loop starts
+    int nop_size = (new (this) MachNopNode())->size(_regalloc);
+    if( i<_cfg->_num_blocks-1 ) {
+      Block *nb = _cfg->_blocks[i+1];
+      uint padding = nb->alignment_padding(current_offset);
+      if( padding > 0 ) {
+        MachNode *nop = new (this) MachNopNode(padding / nop_size);
+        b->_nodes.insert( b->_nodes.size(), nop );
+        _cfg->_bbs.map( nop->_idx, b );
+        nop->emit(*cb, _regalloc);
+        current_offset = cb->code_size();
+      }
+    }
+
+  } // End of for all blocks
+
+  non_safepoints.flush_at_end();
+
+  // Offset too large?
+  if (failing())  return;
+
+  // Define a pseudo-label at the end of the code
+  MacroAssembler(cb).bind( blk_labels[_cfg->_num_blocks] );
+
+  // Compute the size of the first block
+  _first_block_size = blk_labels[1].loc_pos() - blk_labels[0].loc_pos();
+
+  assert(cb->code_size() < 500000, "method is unreasonably large");
+
+  // ------------------
+
+#ifndef PRODUCT
+  // Information on the size of the method, without the extraneous code
+  Scheduling::increment_method_size(cb->code_size());
+#endif
+
+  // ------------------
+  // Fill in exception table entries.
+  FillExceptionTables(inct_cnt, call_returns, inct_starts, blk_labels);
+
+  // Only java methods have exception handlers and deopt handlers
+  if (_method) {
+    // Emit the exception handler code.
+    _code_offsets.set_value(CodeOffsets::Exceptions, emit_exception_handler(*cb));
+    // Emit the deopt handler code.
+    _code_offsets.set_value(CodeOffsets::Deopt, emit_deopt_handler(*cb));
+  }
+
+  // One last check for failed CodeBuffer::expand:
+  if (cb->blob() == NULL) {
+    turn_off_compiler(this);
+    return;
+  }
+
+#ifndef PRODUCT
+  // Dump the assembly code, including basic-block numbers
+  if (print_assembly()) {
+    ttyLocker ttyl;  // keep the following output all in one block
+    if (!VMThread::should_terminate()) {  // test this under the tty lock
+      // This output goes directly to the tty, not the compiler log.
+      // To enable tools to match it up with the compilation activity,
+      // be sure to tag this tty output with the compile ID.
+      if (xtty != NULL) {
+        xtty->head("opto_assembly compile_id='%d'%s", compile_id(),
+                   is_osr_compilation()    ? " compile_kind='osr'" :
+                   "");
+      }
+      if (method() != NULL) {
+        method()->print_oop();
+        print_codes();
+      }
+      dump_asm(node_offsets, node_offset_limit);
+      if (xtty != NULL) {
+        xtty->tail("opto_assembly");
+      }
+    }
+  }
+#endif
+
+}
+
+void Compile::FillExceptionTables(uint cnt, uint *call_returns, uint *inct_starts, Label *blk_labels) {
+  _inc_table.set_size(cnt);
+
+  uint inct_cnt = 0;
+  for( uint i=0; i<_cfg->_num_blocks; i++ ) {
+    Block *b = _cfg->_blocks[i];
+    Node *n = NULL;
+    int j;
+
+    // Find the branch; ignore trailing NOPs.
+    for( j = b->_nodes.size()-1; j>=0; j-- ) {
+      n = b->_nodes[j];
+      if( !n->is_Mach() || n->as_Mach()->ideal_Opcode() != Op_Con )
+        break;
+    }
+
+    // If we didn't find anything, continue
+    if( j < 0 ) continue;
+
+    // Compute ExceptionHandlerTable subtable entry and add it
+    // (skip empty blocks)
+    if( n->is_Catch() ) {
+
+      // Get the offset of the return from the call
+      uint call_return = call_returns[b->_pre_order];
+#ifdef ASSERT
+      assert( call_return > 0, "no call seen for this basic block" );
+      while( b->_nodes[--j]->Opcode() == Op_MachProj ) ;
+      assert( b->_nodes[j]->is_Call(), "CatchProj must follow call" );
+#endif
+      // last instruction is a CatchNode, find it's CatchProjNodes
+      int nof_succs = b->_num_succs;
+      // allocate space
+      GrowableArray<intptr_t> handler_bcis(nof_succs);
+      GrowableArray<intptr_t> handler_pcos(nof_succs);
+      // iterate through all successors
+      for (int j = 0; j < nof_succs; j++) {
+        Block* s = b->_succs[j];
+        bool found_p = false;
+        for( uint k = 1; k < s->num_preds(); k++ ) {
+          Node *pk = s->pred(k);
+          if( pk->is_CatchProj() && pk->in(0) == n ) {
+            const CatchProjNode* p = pk->as_CatchProj();
+            found_p = true;
+            // add the corresponding handler bci & pco information
+            if( p->_con != CatchProjNode::fall_through_index ) {
+              // p leads to an exception handler (and is not fall through)
+              assert(s == _cfg->_blocks[s->_pre_order],"bad numbering");
+              // no duplicates, please
+              if( !handler_bcis.contains(p->handler_bci()) ) {
+                uint block_num = s->non_connector()->_pre_order;
+                handler_bcis.append(p->handler_bci());
+                handler_pcos.append(blk_labels[block_num].loc_pos());
+              }
+            }
+          }
+        }
+        assert(found_p, "no matching predecessor found");
+        // Note:  Due to empty block removal, one block may have
+        // several CatchProj inputs, from the same Catch.
+      }
+
+      // Set the offset of the return from the call
+      _handler_table.add_subtable(call_return, &handler_bcis, NULL, &handler_pcos);
+      continue;
+    }
+
+    // Handle implicit null exception table updates
+    if( n->is_MachNullCheck() ) {
+      uint block_num = b->non_connector_successor(0)->_pre_order;
+      _inc_table.append( inct_starts[inct_cnt++], blk_labels[block_num].loc_pos() );
+      continue;
+    }
+  } // End of for all blocks fill in exception table entries
+}
+
+// Static Variables
+#ifndef PRODUCT
+uint Scheduling::_total_nop_size = 0;
+uint Scheduling::_total_method_size = 0;
+uint Scheduling::_total_branches = 0;
+uint Scheduling::_total_unconditional_delays = 0;
+uint Scheduling::_total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
+#endif
+
+// Initializer for class Scheduling
+
+Scheduling::Scheduling(Arena *arena, Compile &compile)
+  : _arena(arena),
+    _cfg(compile.cfg()),
+    _bbs(compile.cfg()->_bbs),
+    _regalloc(compile.regalloc()),
+    _reg_node(arena),
+    _bundle_instr_count(0),
+    _bundle_cycle_number(0),
+    _scheduled(arena),
+    _available(arena),
+    _next_node(NULL),
+    _bundle_use(0, 0, resource_count, &_bundle_use_elements[0]),
+    _pinch_free_list(arena)
+#ifndef PRODUCT
+  , _branches(0)
+  , _unconditional_delays(0)
+#endif
+{
+  // Create a MachNopNode
+  _nop = new (&compile) MachNopNode();
+
+  // Now that the nops are in the array, save the count
+  // (but allow entries for the nops)
+  _node_bundling_limit = compile.unique();
+  uint node_max = _regalloc->node_regs_max_index();
+
+  compile.set_node_bundling_limit(_node_bundling_limit);
+
+  // This one is persistant within the Compile class
+  _node_bundling_base = NEW_ARENA_ARRAY(compile.comp_arena(), Bundle, node_max);
+
+  // Allocate space for fixed-size arrays
+  _node_latency    = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
+  _uses            = NEW_ARENA_ARRAY(arena, short,          node_max);
+  _current_latency = NEW_ARENA_ARRAY(arena, unsigned short, node_max);
+
+  // Clear the arrays
+  memset(_node_bundling_base, 0, node_max * sizeof(Bundle));
+  memset(_node_latency,       0, node_max * sizeof(unsigned short));
+  memset(_uses,               0, node_max * sizeof(short));
+  memset(_current_latency,    0, node_max * sizeof(unsigned short));
+
+  // Clear the bundling information
+  memcpy(_bundle_use_elements,
+    Pipeline_Use::elaborated_elements,
+    sizeof(Pipeline_Use::elaborated_elements));
+
+  // Get the last node
+  Block *bb = _cfg->_blocks[_cfg->_blocks.size()-1];
+
+  _next_node = bb->_nodes[bb->_nodes.size()-1];
+}
+
+#ifndef PRODUCT
+// Scheduling destructor
+Scheduling::~Scheduling() {
+  _total_branches             += _branches;
+  _total_unconditional_delays += _unconditional_delays;
+}
+#endif
+
+// Step ahead "i" cycles
+void Scheduling::step(uint i) {
+
+  Bundle *bundle = node_bundling(_next_node);
+  bundle->set_starts_bundle();
+
+  // Update the bundle record, but leave the flags information alone
+  if (_bundle_instr_count > 0) {
+    bundle->set_instr_count(_bundle_instr_count);
+    bundle->set_resources_used(_bundle_use.resourcesUsed());
+  }
+
+  // Update the state information
+  _bundle_instr_count = 0;
+  _bundle_cycle_number += i;
+  _bundle_use.step(i);
+}
+
+void Scheduling::step_and_clear() {
+  Bundle *bundle = node_bundling(_next_node);
+  bundle->set_starts_bundle();
+
+  // Update the bundle record
+  if (_bundle_instr_count > 0) {
+    bundle->set_instr_count(_bundle_instr_count);
+    bundle->set_resources_used(_bundle_use.resourcesUsed());
+
+    _bundle_cycle_number += 1;
+  }
+
+  // Clear the bundling information
+  _bundle_instr_count = 0;
+  _bundle_use.reset();
+
+  memcpy(_bundle_use_elements,
+    Pipeline_Use::elaborated_elements,
+    sizeof(Pipeline_Use::elaborated_elements));
+}
+
+//------------------------------ScheduleAndBundle------------------------------
+// Perform instruction scheduling and bundling over the sequence of
+// instructions in backwards order.
+void Compile::ScheduleAndBundle() {
+
+  // Don't optimize this if it isn't a method
+  if (!_method)
+    return;
+
+  // Don't optimize this if scheduling is disabled
+  if (!do_scheduling())
+    return;
+
+  NOT_PRODUCT( TracePhase t2("isched", &_t_instrSched, TimeCompiler); )
+
+  // Create a data structure for all the scheduling information
+  Scheduling scheduling(Thread::current()->resource_area(), *this);
+
+  // Walk backwards over each basic block, computing the needed alignment
+  // Walk over all the basic blocks
+  scheduling.DoScheduling();
+}
+
+//------------------------------ComputeLocalLatenciesForward-------------------
+// Compute the latency of all the instructions.  This is fairly simple,
+// because we already have a legal ordering.  Walk over the instructions
+// from first to last, and compute the latency of the instruction based
+// on the latency of the preceeding instruction(s).
+void Scheduling::ComputeLocalLatenciesForward(const Block *bb) {
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# -> ComputeLocalLatenciesForward\n");
+#endif
+
+  // Walk over all the schedulable instructions
+  for( uint j=_bb_start; j < _bb_end; j++ ) {
+
+    // This is a kludge, forcing all latency calculations to start at 1.
+    // Used to allow latency 0 to force an instruction to the beginning
+    // of the bb
+    uint latency = 1;
+    Node *use = bb->_nodes[j];
+    uint nlen = use->len();
+
+    // Walk over all the inputs
+    for ( uint k=0; k < nlen; k++ ) {
+      Node *def = use->in(k);
+      if (!def)
+        continue;
+
+      uint l = _node_latency[def->_idx] + use->latency(k);
+      if (latency < l)
+        latency = l;
+    }
+
+    _node_latency[use->_idx] = latency;
+
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) {
+      tty->print("# latency %4d: ", latency);
+      use->dump();
+    }
+#endif
+  }
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# <- ComputeLocalLatenciesForward\n");
+#endif
+
+} // end ComputeLocalLatenciesForward
+
+// See if this node fits into the present instruction bundle
+bool Scheduling::NodeFitsInBundle(Node *n) {
+  uint n_idx = n->_idx;
+
+  // If this is the unconditional delay instruction, then it fits
+  if (n == _unconditional_delay_slot) {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output())
+      tty->print("#     NodeFitsInBundle [%4d]: TRUE; is in unconditional delay slot\n", n->_idx);
+#endif
+    return (true);
+  }
+
+  // If the node cannot be scheduled this cycle, skip it
+  if (_current_latency[n_idx] > _bundle_cycle_number) {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output())
+      tty->print("#     NodeFitsInBundle [%4d]: FALSE; latency %4d > %d\n",
+        n->_idx, _current_latency[n_idx], _bundle_cycle_number);
+#endif
+    return (false);
+  }
+
+  const Pipeline *node_pipeline = n->pipeline();
+
+  uint instruction_count = node_pipeline->instructionCount();
+  if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
+    instruction_count = 0;
+  else if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
+    instruction_count++;
+
+  if (_bundle_instr_count + instruction_count > Pipeline::_max_instrs_per_cycle) {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output())
+      tty->print("#     NodeFitsInBundle [%4d]: FALSE; too many instructions: %d > %d\n",
+        n->_idx, _bundle_instr_count + instruction_count, Pipeline::_max_instrs_per_cycle);
+#endif
+    return (false);
+  }
+
+  // Don't allow non-machine nodes to be handled this way
+  if (!n->is_Mach() && instruction_count == 0)
+    return (false);
+
+  // See if there is any overlap
+  uint delay = _bundle_use.full_latency(0, node_pipeline->resourceUse());
+
+  if (delay > 0) {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output())
+      tty->print("#     NodeFitsInBundle [%4d]: FALSE; functional units overlap\n", n_idx);
+#endif
+    return false;
+  }
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("#     NodeFitsInBundle [%4d]:  TRUE\n", n_idx);
+#endif
+
+  return true;
+}
+
+Node * Scheduling::ChooseNodeToBundle() {
+  uint siz = _available.size();
+
+  if (siz == 0) {
+
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output())
+      tty->print("#   ChooseNodeToBundle: NULL\n");
+#endif
+    return (NULL);
+  }
+
+  // Fast path, if only 1 instruction in the bundle
+  if (siz == 1) {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) {
+      tty->print("#   ChooseNodeToBundle (only 1): ");
+      _available[0]->dump();
+    }
+#endif
+    return (_available[0]);
+  }
+
+  // Don't bother, if the bundle is already full
+  if (_bundle_instr_count < Pipeline::_max_instrs_per_cycle) {
+    for ( uint i = 0; i < siz; i++ ) {
+      Node *n = _available[i];
+
+      // Skip projections, we'll handle them another way
+      if (n->is_Proj())
+        continue;
+
+      // This presupposed that instructions are inserted into the
+      // available list in a legality order; i.e. instructions that
+      // must be inserted first are at the head of the list
+      if (NodeFitsInBundle(n)) {
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output()) {
+          tty->print("#   ChooseNodeToBundle: ");
+          n->dump();
+        }
+#endif
+        return (n);
+      }
+    }
+  }
+
+  // Nothing fits in this bundle, choose the highest priority
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output()) {
+    tty->print("#   ChooseNodeToBundle: ");
+    _available[0]->dump();
+  }
+#endif
+
+  return _available[0];
+}
+
+//------------------------------AddNodeToAvailableList-------------------------
+void Scheduling::AddNodeToAvailableList(Node *n) {
+  assert( !n->is_Proj(), "projections never directly made available" );
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output()) {
+    tty->print("#   AddNodeToAvailableList: ");
+    n->dump();
+  }
+#endif
+
+  int latency = _current_latency[n->_idx];
+
+  // Insert in latency order (insertion sort)
+  uint i;
+  for ( i=0; i < _available.size(); i++ )
+    if (_current_latency[_available[i]->_idx] > latency)
+      break;
+
+  // Special Check for compares following branches
+  if( n->is_Mach() && _scheduled.size() > 0 ) {
+    int op = n->as_Mach()->ideal_Opcode();
+    Node *last = _scheduled[0];
+    if( last->is_MachIf() && last->in(1) == n &&
+        ( op == Op_CmpI ||
+          op == Op_CmpU ||
+          op == Op_CmpP ||
+          op == Op_CmpF ||
+          op == Op_CmpD ||
+          op == Op_CmpL ) ) {
+
+      // Recalculate position, moving to front of same latency
+      for ( i=0 ; i < _available.size(); i++ )
+        if (_current_latency[_available[i]->_idx] >= latency)
+          break;
+    }
+  }
+
+  // Insert the node in the available list
+  _available.insert(i, n);
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    dump_available();
+#endif
+}
+
+//------------------------------DecrementUseCounts-----------------------------
+void Scheduling::DecrementUseCounts(Node *n, const Block *bb) {
+  for ( uint i=0; i < n->len(); i++ ) {
+    Node *def = n->in(i);
+    if (!def) continue;
+    if( def->is_Proj() )        // If this is a machine projection, then
+      def = def->in(0);         // propagate usage thru to the base instruction
+
+    if( _bbs[def->_idx] != bb ) // Ignore if not block-local
+      continue;
+
+    // Compute the latency
+    uint l = _bundle_cycle_number + n->latency(i);
+    if (_current_latency[def->_idx] < l)
+      _current_latency[def->_idx] = l;
+
+    // If this does not have uses then schedule it
+    if ((--_uses[def->_idx]) == 0)
+      AddNodeToAvailableList(def);
+  }
+}
+
+//------------------------------AddNodeToBundle--------------------------------
+void Scheduling::AddNodeToBundle(Node *n, const Block *bb) {
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output()) {
+    tty->print("#   AddNodeToBundle: ");
+    n->dump();
+  }
+#endif
+
+  // Remove this from the available list
+  uint i;
+  for (i = 0; i < _available.size(); i++)
+    if (_available[i] == n)
+      break;
+  assert(i < _available.size(), "entry in _available list not found");
+  _available.remove(i);
+
+  // See if this fits in the current bundle
+  const Pipeline *node_pipeline = n->pipeline();
+  const Pipeline_Use& node_usage = node_pipeline->resourceUse();
+
+  // Check for instructions to be placed in the delay slot. We
+  // do this before we actually schedule the current instruction,
+  // because the delay slot follows the current instruction.
+  if (Pipeline::_branch_has_delay_slot &&
+      node_pipeline->hasBranchDelay() &&
+      !_unconditional_delay_slot) {
+
+    uint siz = _available.size();
+
+    // Conditional branches can support an instruction that
+    // is unconditionally executed and not dependant by the
+    // branch, OR a conditionally executed instruction if
+    // the branch is taken.  In practice, this means that
+    // the first instruction at the branch target is
+    // copied to the delay slot, and the branch goes to
+    // the instruction after that at the branch target
+    if ( n->is_Mach() && n->is_Branch() ) {
+
+      assert( !n->is_MachNullCheck(), "should not look for delay slot for Null Check" );
+      assert( !n->is_Catch(),         "should not look for delay slot for Catch" );
+
+#ifndef PRODUCT
+      _branches++;
+#endif
+
+      // At least 1 instruction is on the available list
+      // that is not dependant on the branch
+      for (uint i = 0; i < siz; i++) {
+        Node *d = _available[i];
+        const Pipeline *avail_pipeline = d->pipeline();
+
+        // Don't allow safepoints in the branch shadow, that will
+        // cause a number of difficulties
+        if ( avail_pipeline->instructionCount() == 1 &&
+            !avail_pipeline->hasMultipleBundles() &&
+            !avail_pipeline->hasBranchDelay() &&
+            Pipeline::instr_has_unit_size() &&
+            d->size(_regalloc) == Pipeline::instr_unit_size() &&
+            NodeFitsInBundle(d) &&
+            !node_bundling(d)->used_in_delay()) {
+
+          if (d->is_Mach() && !d->is_MachSafePoint()) {
+            // A node that fits in the delay slot was found, so we need to
+            // set the appropriate bits in the bundle pipeline information so
+            // that it correctly indicates resource usage.  Later, when we
+            // attempt to add this instruction to the bundle, we will skip
+            // setting the resource usage.
+            _unconditional_delay_slot = d;
+            node_bundling(n)->set_use_unconditional_delay();
+            node_bundling(d)->set_used_in_unconditional_delay();
+            _bundle_use.add_usage(avail_pipeline->resourceUse());
+            _current_latency[d->_idx] = _bundle_cycle_number;
+            _next_node = d;
+            ++_bundle_instr_count;
+#ifndef PRODUCT
+            _unconditional_delays++;
+#endif
+            break;
+          }
+        }
+      }
+    }
+
+    // No delay slot, add a nop to the usage
+    if (!_unconditional_delay_slot) {
+      // See if adding an instruction in the delay slot will overflow
+      // the bundle.
+      if (!NodeFitsInBundle(_nop)) {
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output())
+          tty->print("#  *** STEP(1 instruction for delay slot) ***\n");
+#endif
+        step(1);
+      }
+
+      _bundle_use.add_usage(_nop->pipeline()->resourceUse());
+      _next_node = _nop;
+      ++_bundle_instr_count;
+    }
+
+    // See if the instruction in the delay slot requires a
+    // step of the bundles
+    if (!NodeFitsInBundle(n)) {
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output())
+          tty->print("#  *** STEP(branch won't fit) ***\n");
+#endif
+        // Update the state information
+        _bundle_instr_count = 0;
+        _bundle_cycle_number += 1;
+        _bundle_use.step(1);
+    }
+  }
+
+  // Get the number of instructions
+  uint instruction_count = node_pipeline->instructionCount();
+  if (node_pipeline->mayHaveNoCode() && n->size(_regalloc) == 0)
+    instruction_count = 0;
+
+  // Compute the latency information
+  uint delay = 0;
+
+  if (instruction_count > 0 || !node_pipeline->mayHaveNoCode()) {
+    int relative_latency = _current_latency[n->_idx] - _bundle_cycle_number;
+    if (relative_latency < 0)
+      relative_latency = 0;
+
+    delay = _bundle_use.full_latency(relative_latency, node_usage);
+
+    // Does not fit in this bundle, start a new one
+    if (delay > 0) {
+      step(delay);
+
+#ifndef PRODUCT
+      if (_cfg->C->trace_opto_output())
+        tty->print("#  *** STEP(%d) ***\n", delay);
+#endif
+    }
+  }
+
+  // If this was placed in the delay slot, ignore it
+  if (n != _unconditional_delay_slot) {
+
+    if (delay == 0) {
+      if (node_pipeline->hasMultipleBundles()) {
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output())
+          tty->print("#  *** STEP(multiple instructions) ***\n");
+#endif
+        step(1);
+      }
+
+      else if (instruction_count + _bundle_instr_count > Pipeline::_max_instrs_per_cycle) {
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output())
+          tty->print("#  *** STEP(%d >= %d instructions) ***\n",
+            instruction_count + _bundle_instr_count,
+            Pipeline::_max_instrs_per_cycle);
+#endif
+        step(1);
+      }
+    }
+
+    if (node_pipeline->hasBranchDelay() && !_unconditional_delay_slot)
+      _bundle_instr_count++;
+
+    // Set the node's latency
+    _current_latency[n->_idx] = _bundle_cycle_number;
+
+    // Now merge the functional unit information
+    if (instruction_count > 0 || !node_pipeline->mayHaveNoCode())
+      _bundle_use.add_usage(node_usage);
+
+    // Increment the number of instructions in this bundle
+    _bundle_instr_count += instruction_count;
+
+    // Remember this node for later
+    if (n->is_Mach())
+      _next_node = n;
+  }
+
+  // It's possible to have a BoxLock in the graph and in the _bbs mapping but
+  // not in the bb->_nodes array.  This happens for debug-info-only BoxLocks.
+  // 'Schedule' them (basically ignore in the schedule) but do not insert them
+  // into the block.  All other scheduled nodes get put in the schedule here.
+  int op = n->Opcode();
+  if( (op == Op_Node && n->req() == 0) || // anti-dependence node OR
+      (op != Op_Node &&         // Not an unused antidepedence node and
+       // not an unallocated boxlock
+       (OptoReg::is_valid(_regalloc->get_reg_first(n)) || op != Op_BoxLock)) ) {
+
+    // Push any trailing projections
+    if( bb->_nodes[bb->_nodes.size()-1] != n ) {
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        Node *foi = n->fast_out(i);
+        if( foi->is_Proj() )
+          _scheduled.push(foi);
+      }
+    }
+
+    // Put the instruction in the schedule list
+    _scheduled.push(n);
+  }
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    dump_available();
+#endif
+
+  // Walk all the definitions, decrementing use counts, and
+  // if a definition has a 0 use count, place it in the available list.
+  DecrementUseCounts(n,bb);
+}
+
+//------------------------------ComputeUseCount--------------------------------
+// This method sets the use count within a basic block.  We will ignore all
+// uses outside the current basic block.  As we are doing a backwards walk,
+// any node we reach that has a use count of 0 may be scheduled.  This also
+// avoids the problem of cyclic references from phi nodes, as long as phi
+// nodes are at the front of the basic block.  This method also initializes
+// the available list to the set of instructions that have no uses within this
+// basic block.
+void Scheduling::ComputeUseCount(const Block *bb) {
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# -> ComputeUseCount\n");
+#endif
+
+  // Clear the list of available and scheduled instructions, just in case
+  _available.clear();
+  _scheduled.clear();
+
+  // No delay slot specified
+  _unconditional_delay_slot = NULL;
+
+#ifdef ASSERT
+  for( uint i=0; i < bb->_nodes.size(); i++ )
+    assert( _uses[bb->_nodes[i]->_idx] == 0, "_use array not clean" );
+#endif
+
+  // Force the _uses count to never go to zero for unscheduable pieces
+  // of the block
+  for( uint k = 0; k < _bb_start; k++ )
+    _uses[bb->_nodes[k]->_idx] = 1;
+  for( uint l = _bb_end; l < bb->_nodes.size(); l++ )
+    _uses[bb->_nodes[l]->_idx] = 1;
+
+  // Iterate backwards over the instructions in the block.  Don't count the
+  // branch projections at end or the block header instructions.
+  for( uint j = _bb_end-1; j >= _bb_start; j-- ) {
+    Node *n = bb->_nodes[j];
+    if( n->is_Proj() ) continue; // Projections handled another way
+
+    // Account for all uses
+    for ( uint k = 0; k < n->len(); k++ ) {
+      Node *inp = n->in(k);
+      if (!inp) continue;
+      assert(inp != n, "no cycles allowed" );
+      if( _bbs[inp->_idx] == bb ) { // Block-local use?
+        if( inp->is_Proj() )    // Skip through Proj's
+          inp = inp->in(0);
+        ++_uses[inp->_idx];     // Count 1 block-local use
+      }
+    }
+
+    // If this instruction has a 0 use count, then it is available
+    if (!_uses[n->_idx]) {
+      _current_latency[n->_idx] = _bundle_cycle_number;
+      AddNodeToAvailableList(n);
+    }
+
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) {
+      tty->print("#   uses: %3d: ", _uses[n->_idx]);
+      n->dump();
+    }
+#endif
+  }
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# <- ComputeUseCount\n");
+#endif
+}
+
+// This routine performs scheduling on each basic block in reverse order,
+// using instruction latencies and taking into account function unit
+// availability.
+void Scheduling::DoScheduling() {
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# -> DoScheduling\n");
+#endif
+
+  Block *succ_bb = NULL;
+  Block *bb;
+
+  // Walk over all the basic blocks in reverse order
+  for( int i=_cfg->_num_blocks-1; i >= 0; succ_bb = bb, i-- ) {
+    bb = _cfg->_blocks[i];
+
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) {
+      tty->print("#  Schedule BB#%03d (initial)\n", i);
+      for (uint j = 0; j < bb->_nodes.size(); j++)
+        bb->_nodes[j]->dump();
+    }
+#endif
+
+    // On the head node, skip processing
+    if( bb == _cfg->_broot )
+      continue;
+
+    // Skip empty, connector blocks
+    if (bb->is_connector())
+      continue;
+
+    // If the following block is not the sole successor of
+    // this one, then reset the pipeline information
+    if (bb->_num_succs != 1 || bb->non_connector_successor(0) != succ_bb) {
+#ifndef PRODUCT
+      if (_cfg->C->trace_opto_output()) {
+        tty->print("*** bundle start of next BB, node %d, for %d instructions\n",
+                   _next_node->_idx, _bundle_instr_count);
+      }
+#endif
+      step_and_clear();
+    }
+
+    // Leave untouched the starting instruction, any Phis, a CreateEx node
+    // or Top.  bb->_nodes[_bb_start] is the first schedulable instruction.
+    _bb_end = bb->_nodes.size()-1;
+    for( _bb_start=1; _bb_start <= _bb_end; _bb_start++ ) {
+      Node *n = bb->_nodes[_bb_start];
+      // Things not matched, like Phinodes and ProjNodes don't get scheduled.
+      // Also, MachIdealNodes do not get scheduled
+      if( !n->is_Mach() ) continue;     // Skip non-machine nodes
+      MachNode *mach = n->as_Mach();
+      int iop = mach->ideal_Opcode();
+      if( iop == Op_CreateEx ) continue; // CreateEx is pinned
+      if( iop == Op_Con ) continue;      // Do not schedule Top
+      if( iop == Op_Node &&     // Do not schedule PhiNodes, ProjNodes
+          mach->pipeline() == MachNode::pipeline_class() &&
+          !n->is_SpillCopy() )  // Breakpoints, Prolog, etc
+        continue;
+      break;                    // Funny loop structure to be sure...
+    }
+    // Compute last "interesting" instruction in block - last instruction we
+    // might schedule.  _bb_end points just after last schedulable inst.  We
+    // normally schedule conditional branches (despite them being forced last
+    // in the block), because they have delay slots we can fill.  Calls all
+    // have their delay slots filled in the template expansions, so we don't
+    // bother scheduling them.
+    Node *last = bb->_nodes[_bb_end];
+    if( last->is_Catch() ||
+       (last->is_Mach() && last->as_Mach()->ideal_Opcode() == Op_Halt) ) {
+      // There must be a prior call.  Skip it.
+      while( !bb->_nodes[--_bb_end]->is_Call() ) {
+        assert( bb->_nodes[_bb_end]->is_Proj(), "skipping projections after expected call" );
+      }
+    } else if( last->is_MachNullCheck() ) {
+      // Backup so the last null-checked memory instruction is
+      // outside the schedulable range. Skip over the nullcheck,
+      // projection, and the memory nodes.
+      Node *mem = last->in(1);
+      do {
+        _bb_end--;
+      } while (mem != bb->_nodes[_bb_end]);
+    } else {
+      // Set _bb_end to point after last schedulable inst.
+      _bb_end++;
+    }
+
+    assert( _bb_start <= _bb_end, "inverted block ends" );
+
+    // Compute the register antidependencies for the basic block
+    ComputeRegisterAntidependencies(bb);
+    if (_cfg->C->failing())  return;  // too many D-U pinch points
+
+    // Compute intra-bb latencies for the nodes
+    ComputeLocalLatenciesForward(bb);
+
+    // Compute the usage within the block, and set the list of all nodes
+    // in the block that have no uses within the block.
+    ComputeUseCount(bb);
+
+    // Schedule the remaining instructions in the block
+    while ( _available.size() > 0 ) {
+      Node *n = ChooseNodeToBundle();
+      AddNodeToBundle(n,bb);
+    }
+
+    assert( _scheduled.size() == _bb_end - _bb_start, "wrong number of instructions" );
+#ifdef ASSERT
+    for( uint l = _bb_start; l < _bb_end; l++ ) {
+      Node *n = bb->_nodes[l];
+      uint m;
+      for( m = 0; m < _bb_end-_bb_start; m++ )
+        if( _scheduled[m] == n )
+          break;
+      assert( m < _bb_end-_bb_start, "instruction missing in schedule" );
+    }
+#endif
+
+    // Now copy the instructions (in reverse order) back to the block
+    for ( uint k = _bb_start; k < _bb_end; k++ )
+      bb->_nodes.map(k, _scheduled[_bb_end-k-1]);
+
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) {
+      tty->print("#  Schedule BB#%03d (final)\n", i);
+      uint current = 0;
+      for (uint j = 0; j < bb->_nodes.size(); j++) {
+        Node *n = bb->_nodes[j];
+        if( valid_bundle_info(n) ) {
+          Bundle *bundle = node_bundling(n);
+          if (bundle->instr_count() > 0 || bundle->flags() > 0) {
+            tty->print("*** Bundle: ");
+            bundle->dump();
+          }
+          n->dump();
+        }
+      }
+    }
+#endif
+#ifdef ASSERT
+  verify_good_schedule(bb,"after block local scheduling");
+#endif
+  }
+
+#ifndef PRODUCT
+  if (_cfg->C->trace_opto_output())
+    tty->print("# <- DoScheduling\n");
+#endif
+
+  // Record final node-bundling array location
+  _regalloc->C->set_node_bundling_base(_node_bundling_base);
+
+} // end DoScheduling
+
+//------------------------------verify_good_schedule---------------------------
+// Verify that no live-range used in the block is killed in the block by a
+// wrong DEF.  This doesn't verify live-ranges that span blocks.
+
+// Check for edge existence.  Used to avoid adding redundant precedence edges.
+static bool edge_from_to( Node *from, Node *to ) {
+  for( uint i=0; i<from->len(); i++ )
+    if( from->in(i) == to )
+      return true;
+  return false;
+}
+
+#ifdef ASSERT
+//------------------------------verify_do_def----------------------------------
+void Scheduling::verify_do_def( Node *n, OptoReg::Name def, const char *msg ) {
+  // Check for bad kills
+  if( OptoReg::is_valid(def) ) { // Ignore stores & control flow
+    Node *prior_use = _reg_node[def];
+    if( prior_use && !edge_from_to(prior_use,n) ) {
+      tty->print("%s = ",OptoReg::as_VMReg(def)->name());
+      n->dump();
+      tty->print_cr("...");
+      prior_use->dump();
+      assert_msg(edge_from_to(prior_use,n),msg);
+    }
+    _reg_node.map(def,NULL); // Kill live USEs
+  }
+}
+
+//------------------------------verify_good_schedule---------------------------
+void Scheduling::verify_good_schedule( Block *b, const char *msg ) {
+
+  // Zap to something reasonable for the verify code
+  _reg_node.clear();
+
+  // Walk over the block backwards.  Check to make sure each DEF doesn't
+  // kill a live value (other than the one it's supposed to).  Add each
+  // USE to the live set.
+  for( uint i = b->_nodes.size()-1; i >= _bb_start; i-- ) {
+    Node *n = b->_nodes[i];
+    int n_op = n->Opcode();
+    if( n_op == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
+      // Fat-proj kills a slew of registers
+      RegMask rm = n->out_RegMask();// Make local copy
+      while( rm.is_NotEmpty() ) {
+        OptoReg::Name kill = rm.find_first_elem();
+        rm.Remove(kill);
+        verify_do_def( n, kill, msg );
+      }
+    } else if( n_op != Op_Node ) { // Avoid brand new antidependence nodes
+      // Get DEF'd registers the normal way
+      verify_do_def( n, _regalloc->get_reg_first(n), msg );
+      verify_do_def( n, _regalloc->get_reg_second(n), msg );
+    }
+
+    // Now make all USEs live
+    for( uint i=1; i<n->req(); i++ ) {
+      Node *def = n->in(i);
+      assert(def != 0, "input edge required");
+      OptoReg::Name reg_lo = _regalloc->get_reg_first(def);
+      OptoReg::Name reg_hi = _regalloc->get_reg_second(def);
+      if( OptoReg::is_valid(reg_lo) ) {
+        assert_msg(!_reg_node[reg_lo] || edge_from_to(_reg_node[reg_lo],def), msg );
+        _reg_node.map(reg_lo,n);
+      }
+      if( OptoReg::is_valid(reg_hi) ) {
+        assert_msg(!_reg_node[reg_hi] || edge_from_to(_reg_node[reg_hi],def), msg );
+        _reg_node.map(reg_hi,n);
+      }
+    }
+
+  }
+
+  // Zap to something reasonable for the Antidependence code
+  _reg_node.clear();
+}
+#endif
+
+// Conditionally add precedence edges.  Avoid putting edges on Projs.
+static void add_prec_edge_from_to( Node *from, Node *to ) {
+  if( from->is_Proj() ) {       // Put precedence edge on Proj's input
+    assert( from->req() == 1 && (from->len() == 1 || from->in(1)==0), "no precedence edges on projections" );
+    from = from->in(0);
+  }
+  if( from != to &&             // No cycles (for things like LD L0,[L0+4] )
+      !edge_from_to( from, to ) ) // Avoid duplicate edge
+    from->add_prec(to);
+}
+
+//------------------------------anti_do_def------------------------------------
+void Scheduling::anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def ) {
+  if( !OptoReg::is_valid(def_reg) ) // Ignore stores & control flow
+    return;
+
+  Node *pinch = _reg_node[def_reg]; // Get pinch point
+  if( !pinch || _bbs[pinch->_idx] != b || // No pinch-point yet?
+      is_def ) {    // Check for a true def (not a kill)
+    _reg_node.map(def_reg,def); // Record def/kill as the optimistic pinch-point
+    return;
+  }
+
+  Node *kill = def;             // Rename 'def' to more descriptive 'kill'
+  debug_only( def = (Node*)0xdeadbeef; )
+
+  // After some number of kills there _may_ be a later def
+  Node *later_def = NULL;
+
+  // Finding a kill requires a real pinch-point.
+  // Check for not already having a pinch-point.
+  // Pinch points are Op_Node's.
+  if( pinch->Opcode() != Op_Node ) { // Or later-def/kill as pinch-point?
+    later_def = pinch;            // Must be def/kill as optimistic pinch-point
+    if ( _pinch_free_list.size() > 0) {
+      pinch = _pinch_free_list.pop();
+    } else {
+      pinch = new (_cfg->C, 1) Node(1); // Pinch point to-be
+    }
+    if (pinch->_idx >= _regalloc->node_regs_max_index()) {
+      _cfg->C->record_method_not_compilable("too many D-U pinch points");
+      return;
+    }
+    _bbs.map(pinch->_idx,b);      // Pretend it's valid in this block (lazy init)
+    _reg_node.map(def_reg,pinch); // Record pinch-point
+    //_regalloc->set_bad(pinch->_idx); // Already initialized this way.
+    if( later_def->outcnt() == 0 || later_def->ideal_reg() == MachProjNode::fat_proj ) { // Distinguish def from kill
+      pinch->init_req(0, _cfg->C->top());     // set not NULL for the next call
+      add_prec_edge_from_to(later_def,pinch); // Add edge from kill to pinch
+      later_def = NULL;           // and no later def
+    }
+    pinch->set_req(0,later_def);  // Hook later def so we can find it
+  } else {                        // Else have valid pinch point
+    if( pinch->in(0) )            // If there is a later-def
+      later_def = pinch->in(0);   // Get it
+  }
+
+  // Add output-dependence edge from later def to kill
+  if( later_def )               // If there is some original def
+    add_prec_edge_from_to(later_def,kill); // Add edge from def to kill
+
+  // See if current kill is also a use, and so is forced to be the pinch-point.
+  if( pinch->Opcode() == Op_Node ) {
+    Node *uses = kill->is_Proj() ? kill->in(0) : kill;
+    for( uint i=1; i<uses->req(); i++ ) {
+      if( _regalloc->get_reg_first(uses->in(i)) == def_reg ||
+          _regalloc->get_reg_second(uses->in(i)) == def_reg ) {
+        // Yes, found a use/kill pinch-point
+        pinch->set_req(0,NULL);  //
+        pinch->replace_by(kill); // Move anti-dep edges up
+        pinch = kill;
+        _reg_node.map(def_reg,pinch);
+        return;
+      }
+    }
+  }
+
+  // Add edge from kill to pinch-point
+  add_prec_edge_from_to(kill,pinch);
+}
+
+//------------------------------anti_do_use------------------------------------
+void Scheduling::anti_do_use( Block *b, Node *use, OptoReg::Name use_reg ) {
+  if( !OptoReg::is_valid(use_reg) ) // Ignore stores & control flow
+    return;
+  Node *pinch = _reg_node[use_reg]; // Get pinch point
+  // Check for no later def_reg/kill in block
+  if( pinch && _bbs[pinch->_idx] == b &&
+      // Use has to be block-local as well
+      _bbs[use->_idx] == b ) {
+    if( pinch->Opcode() == Op_Node && // Real pinch-point (not optimistic?)
+        pinch->req() == 1 ) {   // pinch not yet in block?
+      pinch->del_req(0);        // yank pointer to later-def, also set flag
+      // Insert the pinch-point in the block just after the last use
+      b->_nodes.insert(b->find_node(use)+1,pinch);
+      _bb_end++;                // Increase size scheduled region in block
+    }
+
+    add_prec_edge_from_to(pinch,use);
+  }
+}
+
+//------------------------------ComputeRegisterAntidependences-----------------
+// We insert antidependences between the reads and following write of
+// allocated registers to prevent illegal code motion. Hopefully, the
+// number of added references should be fairly small, especially as we
+// are only adding references within the current basic block.
+void Scheduling::ComputeRegisterAntidependencies(Block *b) {
+
+#ifdef ASSERT
+  verify_good_schedule(b,"before block local scheduling");
+#endif
+
+  // A valid schedule, for each register independently, is an endless cycle
+  // of: a def, then some uses (connected to the def by true dependencies),
+  // then some kills (defs with no uses), finally the cycle repeats with a new
+  // def.  The uses are allowed to float relative to each other, as are the
+  // kills.  No use is allowed to slide past a kill (or def).  This requires
+  // antidependencies between all uses of a single def and all kills that
+  // follow, up to the next def.  More edges are redundant, because later defs
+  // & kills are already serialized with true or antidependencies.  To keep
+  // the edge count down, we add a 'pinch point' node if there's more than
+  // one use or more than one kill/def.
+
+  // We add dependencies in one bottom-up pass.
+
+  // For each instruction we handle it's DEFs/KILLs, then it's USEs.
+
+  // For each DEF/KILL, we check to see if there's a prior DEF/KILL for this
+  // register.  If not, we record the DEF/KILL in _reg_node, the
+  // register-to-def mapping.  If there is a prior DEF/KILL, we insert a
+  // "pinch point", a new Node that's in the graph but not in the block.
+  // We put edges from the prior and current DEF/KILLs to the pinch point.
+  // We put the pinch point in _reg_node.  If there's already a pinch point
+  // we merely add an edge from the current DEF/KILL to the pinch point.
+
+  // After doing the DEF/KILLs, we handle USEs.  For each used register, we
+  // put an edge from the pinch point to the USE.
+
+  // To be expedient, the _reg_node array is pre-allocated for the whole
+  // compilation.  _reg_node is lazily initialized; it either contains a NULL,
+  // or a valid def/kill/pinch-point, or a leftover node from some prior
+  // block.  Leftover node from some prior block is treated like a NULL (no
+  // prior def, so no anti-dependence needed).  Valid def is distinguished by
+  // it being in the current block.
+  bool fat_proj_seen = false;
+  uint last_safept = _bb_end-1;
+  Node* end_node         = (_bb_end-1 >= _bb_start) ? b->_nodes[last_safept] : NULL;
+  Node* last_safept_node = end_node;
+  for( uint i = _bb_end-1; i >= _bb_start; i-- ) {
+    Node *n = b->_nodes[i];
+    int is_def = n->outcnt();   // def if some uses prior to adding precedence edges
+    if( n->Opcode() == Op_MachProj && n->ideal_reg() == MachProjNode::fat_proj ) {
+      // Fat-proj kills a slew of registers
+      // This can add edges to 'n' and obscure whether or not it was a def,
+      // hence the is_def flag.
+      fat_proj_seen = true;
+      RegMask rm = n->out_RegMask();// Make local copy
+      while( rm.is_NotEmpty() ) {
+        OptoReg::Name kill = rm.find_first_elem();
+        rm.Remove(kill);
+        anti_do_def( b, n, kill, is_def );
+      }
+    } else {
+      // Get DEF'd registers the normal way
+      anti_do_def( b, n, _regalloc->get_reg_first(n), is_def );
+      anti_do_def( b, n, _regalloc->get_reg_second(n), is_def );
+    }
+
+    // Check each register used by this instruction for a following DEF/KILL
+    // that must occur afterward and requires an anti-dependence edge.
+    for( uint j=0; j<n->req(); j++ ) {
+      Node *def = n->in(j);
+      if( def ) {
+        assert( def->Opcode() != Op_MachProj || def->ideal_reg() != MachProjNode::fat_proj, "" );
+        anti_do_use( b, n, _regalloc->get_reg_first(def) );
+        anti_do_use( b, n, _regalloc->get_reg_second(def) );
+      }
+    }
+    // Do not allow defs of new derived values to float above GC
+    // points unless the base is definitely available at the GC point.
+
+    Node *m = b->_nodes[i];
+
+    // Add precedence edge from following safepoint to use of derived pointer
+    if( last_safept_node != end_node &&
+        m != last_safept_node) {
+      for (uint k = 1; k < m->req(); k++) {
+        const Type *t = m->in(k)->bottom_type();
+        if( t->isa_oop_ptr() &&
+            t->is_ptr()->offset() != 0 ) {
+          last_safept_node->add_prec( m );
+          break;
+        }
+      }
+    }
+
+    if( n->jvms() ) {           // Precedence edge from derived to safept
+      // Check if last_safept_node was moved by pinch-point insertion in anti_do_use()
+      if( b->_nodes[last_safept] != last_safept_node ) {
+        last_safept = b->find_node(last_safept_node);
+      }
+      for( uint j=last_safept; j > i; j-- ) {
+        Node *mach = b->_nodes[j];
+        if( mach->is_Mach() && mach->as_Mach()->ideal_Opcode() == Op_AddP )
+          mach->add_prec( n );
+      }
+      last_safept = i;
+      last_safept_node = m;
+    }
+  }
+
+  if (fat_proj_seen) {
+    // Garbage collect pinch nodes that were not consumed.
+    // They are usually created by a fat kill MachProj for a call.
+    garbage_collect_pinch_nodes();
+  }
+}
+
+//------------------------------garbage_collect_pinch_nodes-------------------------------
+
+// Garbage collect pinch nodes for reuse by other blocks.
+//
+// The block scheduler's insertion of anti-dependence
+// edges creates many pinch nodes when the block contains
+// 2 or more Calls.  A pinch node is used to prevent a
+// combinatorial explosion of edges.  If a set of kills for a
+// register is anti-dependent on a set of uses (or defs), rather
+// than adding an edge in the graph between each pair of kill
+// and use (or def), a pinch is inserted between them:
+//
+//            use1   use2  use3
+//                \   |   /
+//                 \  |  /
+//                  pinch
+//                 /  |  \
+//                /   |   \
+//            kill1 kill2 kill3
+//
+// One pinch node is created per register killed when
+// the second call is encountered during a backwards pass
+// over the block.  Most of these pinch nodes are never
+// wired into the graph because the register is never
+// used or def'ed in the block.
+//
+void Scheduling::garbage_collect_pinch_nodes() {
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) tty->print("Reclaimed pinch nodes:");
+#endif
+    int trace_cnt = 0;
+    for (uint k = 0; k < _reg_node.Size(); k++) {
+      Node* pinch = _reg_node[k];
+      if (pinch != NULL && pinch->Opcode() == Op_Node &&
+          // no predecence input edges
+          (pinch->req() == pinch->len() || pinch->in(pinch->req()) == NULL) ) {
+        cleanup_pinch(pinch);
+        _pinch_free_list.push(pinch);
+        _reg_node.map(k, NULL);
+#ifndef PRODUCT
+        if (_cfg->C->trace_opto_output()) {
+          trace_cnt++;
+          if (trace_cnt > 40) {
+            tty->print("\n");
+            trace_cnt = 0;
+          }
+          tty->print(" %d", pinch->_idx);
+        }
+#endif
+      }
+    }
+#ifndef PRODUCT
+    if (_cfg->C->trace_opto_output()) tty->print("\n");
+#endif
+}
+
+// Clean up a pinch node for reuse.
+void Scheduling::cleanup_pinch( Node *pinch ) {
+  assert (pinch && pinch->Opcode() == Op_Node && pinch->req() == 1, "just checking");
+
+  for (DUIterator_Last imin, i = pinch->last_outs(imin); i >= imin; ) {
+    Node* use = pinch->last_out(i);
+    uint uses_found = 0;
+    for (uint j = use->req(); j < use->len(); j++) {
+      if (use->in(j) == pinch) {
+        use->rm_prec(j);
+        uses_found++;
+      }
+    }
+    assert(uses_found > 0, "must be a precedence edge");
+    i -= uses_found;    // we deleted 1 or more copies of this edge
+  }
+  // May have a later_def entry
+  pinch->set_req(0, NULL);
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+
+void Scheduling::dump_available() const {
+  tty->print("#Availist  ");
+  for (uint i = 0; i < _available.size(); i++)
+    tty->print(" N%d/l%d", _available[i]->_idx,_current_latency[_available[i]->_idx]);
+  tty->cr();
+}
+
+// Print Scheduling Statistics
+void Scheduling::print_statistics() {
+  // Print the size added by nops for bundling
+  tty->print("Nops added %d bytes to total of %d bytes",
+    _total_nop_size, _total_method_size);
+  if (_total_method_size > 0)
+    tty->print(", for %.2f%%",
+      ((double)_total_nop_size) / ((double) _total_method_size) * 100.0);
+  tty->print("\n");
+
+  // Print the number of branch shadows filled
+  if (Pipeline::_branch_has_delay_slot) {
+    tty->print("Of %d branches, %d had unconditional delay slots filled",
+      _total_branches, _total_unconditional_delays);
+    if (_total_branches > 0)
+      tty->print(", for %.2f%%",
+        ((double)_total_unconditional_delays) / ((double)_total_branches) * 100.0);
+    tty->print("\n");
+  }
+
+  uint total_instructions = 0, total_bundles = 0;
+
+  for (uint i = 1; i <= Pipeline::_max_instrs_per_cycle; i++) {
+    uint bundle_count   = _total_instructions_per_bundle[i];
+    total_instructions += bundle_count * i;
+    total_bundles      += bundle_count;
+  }
+
+  if (total_bundles > 0)
+    tty->print("Average ILP (excluding nops) is %.2f\n",
+      ((double)total_instructions) / ((double)total_bundles));
+}
+#endif
diff --git a/src/share/vm/opto/output.hpp b/src/share/vm/opto/output.hpp
new file mode 100644
index 000000000..386e2be16
--- /dev/null
+++ b/src/share/vm/opto/output.hpp
@@ -0,0 +1,215 @@
+/*
+ * Copyright 2000-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Arena;
+class Bundle;
+class Block;
+class Block_Array;
+class Node;
+class Node_Array;
+class Node_List;
+class PhaseCFG;
+class PhaseChaitin;
+class Pipeline_Use_Element;
+class Pipeline_Use;
+
+#ifndef PRODUCT
+#define DEBUG_ARG(x) , x
+#else
+#define DEBUG_ARG(x)
+#endif
+
+// Define the initial sizes for allocation of the resizable code buffer
+enum {
+  initial_code_capacity  =  16 * 1024,
+  initial_stub_capacity  =   4 * 1024,
+  initial_const_capacity =   4 * 1024,
+  initial_locs_capacity  =   3 * 1024
+};
+
+//------------------------------Scheduling----------------------------------
+// This class contains all the information necessary to implement instruction
+// scheduling and bundling.
+class Scheduling {
+
+private:
+  // Arena to use
+  Arena *_arena;
+
+  // Control-Flow Graph info
+  PhaseCFG *_cfg;
+
+  // Register Allocation info
+  PhaseRegAlloc *_regalloc;
+
+  // Number of nodes in the method
+  uint _node_bundling_limit;
+
+  // List of scheduled nodes. Generated in reverse order
+  Node_List _scheduled;
+
+  // List of nodes currently available for choosing for scheduling
+  Node_List _available;
+
+  // Mapping from node (index) to basic block
+  Block_Array& _bbs;
+
+  // For each instruction beginning a bundle, the number of following
+  // nodes to be bundled with it.
+  Bundle *_node_bundling_base;
+
+  // Mapping from register to Node
+  Node_List _reg_node;
+
+  // Free list for pinch nodes.
+  Node_List _pinch_free_list;
+
+  // Latency from the beginning of the containing basic block (base 1)
+  // for each node.
+  unsigned short *_node_latency;
+
+  // Number of uses of this node within the containing basic block.
+  short *_uses;
+
+  // Schedulable portion of current block.  Skips Region/Phi/CreateEx up
+  // front, branch+proj at end.  Also skips Catch/CProj (same as
+  // branch-at-end), plus just-prior exception-throwing call.
+  uint _bb_start, _bb_end;
+
+  // Latency from the end of the basic block as scheduled
+  unsigned short *_current_latency;
+
+  // Remember the next node
+  Node *_next_node;
+
+  // Use this for an unconditional branch delay slot
+  Node *_unconditional_delay_slot;
+
+  // Pointer to a Nop
+  MachNopNode *_nop;
+
+  // Length of the current bundle, in instructions
+  uint _bundle_instr_count;
+
+  // Current Cycle number, for computing latencies and bundling
+  uint _bundle_cycle_number;
+
+  // Bundle information
+  Pipeline_Use_Element _bundle_use_elements[resource_count];
+  Pipeline_Use         _bundle_use;
+
+  // Dump the available list
+  void dump_available() const;
+
+public:
+  Scheduling(Arena *arena, Compile &compile);
+
+  // Destructor
+  NOT_PRODUCT( ~Scheduling(); )
+
+  // Step ahead "i" cycles
+  void step(uint i);
+
+  // Step ahead 1 cycle, and clear the bundle state (for example,
+  // at a branch target)
+  void step_and_clear();
+
+  Bundle* node_bundling(const Node *n) {
+    assert(valid_bundle_info(n), "oob");
+    return (&_node_bundling_base[n->_idx]);
+  }
+
+  bool valid_bundle_info(const Node *n) const {
+    return (_node_bundling_limit > n->_idx);
+  }
+
+  bool starts_bundle(const Node *n) const {
+    return (_node_bundling_limit > n->_idx && _node_bundling_base[n->_idx].starts_bundle());
+  }
+
+  // Do the scheduling
+  void DoScheduling();
+
+  // Compute the local latencies walking forward over the list of
+  // nodes for a basic block
+  void ComputeLocalLatenciesForward(const Block *bb);
+
+  // Compute the register antidependencies within a basic block
+  void ComputeRegisterAntidependencies(Block *bb);
+  void verify_do_def( Node *n, OptoReg::Name def, const char *msg );
+  void verify_good_schedule( Block *b, const char *msg );
+  void anti_do_def( Block *b, Node *def, OptoReg::Name def_reg, int is_def );
+  void anti_do_use( Block *b, Node *use, OptoReg::Name use_reg );
+
+  // Add a node to the current bundle
+  void AddNodeToBundle(Node *n, const Block *bb);
+
+  // Add a node to the list of available nodes
+  void AddNodeToAvailableList(Node *n);
+
+  // Compute the local use count for the nodes in a block, and compute
+  // the list of instructions with no uses in the block as available
+  void ComputeUseCount(const Block *bb);
+
+  // Choose an instruction from the available list to add to the bundle
+  Node * ChooseNodeToBundle();
+
+  // See if this Node fits into the currently accumulating bundle
+  bool NodeFitsInBundle(Node *n);
+
+  // Decrement the use count for a node
+ void DecrementUseCounts(Node *n, const Block *bb);
+
+  // Garbage collect pinch nodes for reuse by other blocks.
+  void garbage_collect_pinch_nodes();
+  // Clean up a pinch node for reuse (helper for above).
+  void cleanup_pinch( Node *pinch );
+
+  // Information for statistics gathering
+#ifndef PRODUCT
+private:
+  // Gather information on size of nops relative to total
+  uint _branches, _unconditional_delays;
+
+  static uint _total_nop_size, _total_method_size;
+  static uint _total_branches, _total_unconditional_delays;
+  static uint _total_instructions_per_bundle[Pipeline::_max_instrs_per_cycle+1];
+
+public:
+  static void print_statistics();
+
+  static void increment_instructions_per_bundle(uint i) {
+    _total_instructions_per_bundle[i]++;
+  }
+
+  static void increment_nop_size(uint s) {
+    _total_nop_size += s;
+  }
+
+  static void increment_method_size(uint s) {
+    _total_method_size += s;
+  }
+#endif
+
+};
diff --git a/src/share/vm/opto/parse.hpp b/src/share/vm/opto/parse.hpp
new file mode 100644
index 000000000..60ffdf17d
--- /dev/null
+++ b/src/share/vm/opto/parse.hpp
@@ -0,0 +1,555 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class BytecodeParseHistogram;
+class InlineTree;
+class Parse;
+class SwitchRange;
+
+
+//------------------------------InlineTree-------------------------------------
+class InlineTree : public ResourceObj {
+  Compile*    C;                  // cache
+  JVMState*   _caller_jvms;       // state of caller
+  ciMethod*   _method;            // method being called by the caller_jvms
+  InlineTree* _caller_tree;
+  uint        _count_inline_bcs;  // Accumulated count of inlined bytecodes
+  // Call-site count / interpreter invocation count, scaled recursively.
+  // Always between 0.0 and 1.0.  Represents the percentage of the method's
+  // total execution time used at this call site.
+  const float _site_invoke_ratio;
+  float compute_callee_frequency( int caller_bci ) const;
+
+  GrowableArray<InlineTree*> _subtrees;
+  friend class Compile;
+
+protected:
+  InlineTree(Compile* C,
+             const InlineTree* caller_tree,
+             ciMethod* callee_method,
+             JVMState* caller_jvms,
+             int caller_bci,
+             float site_invoke_ratio);
+  InlineTree *build_inline_tree_for_callee(ciMethod* callee_method,
+                                           JVMState* caller_jvms,
+                                           int caller_bci);
+  const char* try_to_inline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result);
+  const char* shouldInline(ciMethod* callee_method, int caller_bci, ciCallProfile& profile, WarmCallInfo* wci_result) const;
+  const char* shouldNotInline(ciMethod* callee_method, WarmCallInfo* wci_result) const;
+  void        print_inlining(ciMethod *callee_method, int caller_bci, const char *failure_msg) const PRODUCT_RETURN;
+
+  InlineTree *caller_tree()       const { return _caller_tree;  }
+  InlineTree* callee_at(int bci, ciMethod* m) const;
+  int         inline_depth()      const { return _caller_jvms ? _caller_jvms->depth() : 0; }
+
+public:
+  static InlineTree* build_inline_tree_root();
+  static InlineTree* find_subtree_from_root(InlineTree* root, JVMState* jvms, ciMethod* callee, bool create_if_not_found = false);
+
+  // For temporary (stack-allocated, stateless) ilts:
+  InlineTree(Compile* c, ciMethod* callee_method, JVMState* caller_jvms, float site_invoke_ratio);
+
+  // InlineTree enum
+  enum InlineStyle {
+    Inline_do_not_inline             =   0, //
+    Inline_cha_is_monomorphic        =   1, //
+    Inline_type_profile_monomorphic  =   2  //
+  };
+
+  // See if it is OK to inline.
+  // The reciever is the inline tree for the caller.
+  //
+  // The result is a temperature indication.  If it is hot or cold,
+  // inlining is immediate or undesirable.  Otherwise, the info block
+  // returned is newly allocated and may be enqueued.
+  //
+  // If the method is inlinable, a new inline subtree is created on the fly,
+  // and may be accessed by find_subtree_from_root.
+  // The call_method is the dest_method for a special or static invocation.
+  // The call_method is an optimized virtual method candidate otherwise.
+  WarmCallInfo* ok_to_inline(ciMethod *call_method, JVMState* caller_jvms, ciCallProfile& profile, WarmCallInfo* wci);
+
+  // Information about inlined method
+  JVMState*   caller_jvms()       const { return _caller_jvms; }
+  ciMethod   *method()            const { return _method; }
+  int         caller_bci()        const { return _caller_jvms ? _caller_jvms->bci() : InvocationEntryBci; }
+  uint        count_inline_bcs()  const { return _count_inline_bcs; }
+  float       site_invoke_ratio() const { return _site_invoke_ratio; };
+
+#ifndef PRODUCT
+private:
+  uint        _count_inlines;     // Count of inlined methods
+public:
+  // Debug information collected during parse
+  uint        count_inlines()     const { return _count_inlines; };
+#endif
+  GrowableArray<InlineTree*> subtrees() { return _subtrees; }
+};
+
+
+//-----------------------------------------------------------------------------
+//------------------------------Parse------------------------------------------
+// Parse bytecodes, build a Graph
+class Parse : public GraphKit {
+ public:
+  // Per-block information needed by the parser:
+  class Block {
+   private:
+    ciTypeFlow::Block* _flow;
+    int                _pred_count;     // how many predecessors in CFG?
+    int                _preds_parsed;   // how many of these have been parsed?
+    uint               _count;          // how many times executed?  Currently only set by _goto's
+    bool               _is_parsed;      // has this block been parsed yet?
+    bool               _is_handler;     // is this block an exception handler?
+    SafePointNode*     _start_map;      // all values flowing into this block
+    MethodLivenessResult _live_locals;  // lazily initialized liveness bitmap
+
+    int                _num_successors; // Includes only normal control flow.
+    int                _all_successors; // Include exception paths also.
+    Block**            _successors;
+
+    // Use init_node/init_graph to initialize Blocks.
+    // Block() : _live_locals((uintptr_t*)NULL,0) { ShouldNotReachHere(); }
+    Block() : _live_locals(NULL,0) { ShouldNotReachHere(); }
+
+   public:
+
+    // Set up the block data structure itself.
+    void init_node(Parse* outer, int po);
+    // Set up the block's relations to other blocks.
+    void init_graph(Parse* outer);
+
+    ciTypeFlow::Block* flow() const        { return _flow; }
+    int pred_count() const                 { return _pred_count; }
+    int preds_parsed() const               { return _preds_parsed; }
+    bool is_parsed() const                 { return _is_parsed; }
+    bool is_handler() const                { return _is_handler; }
+    void set_count( uint x )               { _count = x; }
+    uint count() const                     { return _count; }
+
+    SafePointNode* start_map() const       { assert(is_merged(),"");   return _start_map; }
+    void set_start_map(SafePointNode* m)   { assert(!is_merged(), ""); _start_map = m; }
+
+    // True after any predecessor flows control into this block
+    bool is_merged() const                 { return _start_map != NULL; }
+
+    // True when all non-exception predecessors have been parsed.
+    bool is_ready() const                  { return preds_parsed() == pred_count(); }
+
+    int num_successors() const             { return _num_successors; }
+    int all_successors() const             { return _all_successors; }
+    Block* successor_at(int i) const {
+      assert((uint)i < (uint)all_successors(), "");
+      return _successors[i];
+    }
+    Block* successor_for_bci(int bci);
+
+    int start() const                      { return flow()->start(); }
+    int limit() const                      { return flow()->limit(); }
+    int pre_order() const                  { return flow()->pre_order(); }
+    int start_sp() const                   { return flow()->stack_size(); }
+
+    const Type* peek(int off=0) const      { return stack_type_at(start_sp() - (off+1)); }
+
+    const Type* stack_type_at(int i) const;
+    const Type* local_type_at(int i) const;
+    static const Type* get_type(ciType* t) { return Type::get_typeflow_type(t); }
+
+    bool has_trap_at(int bci) const        { return flow()->has_trap() && flow()->trap_bci() == bci; }
+
+    // Call this just before parsing a block.
+    void mark_parsed() {
+      assert(!_is_parsed, "must parse each block exactly once");
+      _is_parsed = true;
+    }
+
+    // Return the phi/region input index for the "current" pred,
+    // and bump the pred number.  For historical reasons these index
+    // numbers are handed out in descending order.  The last index is
+    // always PhiNode::Input (i.e., 1).  The value returned is known
+    // as a "path number" because it distinguishes by which path we are
+    // entering the block.
+    int next_path_num() {
+      assert(preds_parsed() < pred_count(), "too many preds?");
+      return pred_count() - _preds_parsed++;
+    }
+
+    // Add a previously unaccounted predecessor to this block.
+    // This operates by increasing the size of the block's region
+    // and all its phi nodes (if any).  The value returned is a
+    // path number ("pnum").
+    int add_new_path();
+
+    // Initialize me by recording the parser's map.  My own map must be NULL.
+    void record_state(Parse* outer);
+  };
+
+#ifndef PRODUCT
+  // BytecodeParseHistogram collects number of bytecodes parsed, nodes constructed, and transformations.
+  class BytecodeParseHistogram : public ResourceObj {
+   private:
+    enum BPHType {
+      BPH_transforms,
+      BPH_values
+    };
+    static bool _initialized;
+    static uint _bytecodes_parsed [Bytecodes::number_of_codes];
+    static uint _nodes_constructed[Bytecodes::number_of_codes];
+    static uint _nodes_transformed[Bytecodes::number_of_codes];
+    static uint _new_values       [Bytecodes::number_of_codes];
+
+    Bytecodes::Code _initial_bytecode;
+    int             _initial_node_count;
+    int             _initial_transforms;
+    int             _initial_values;
+
+    Parse     *_parser;
+    Compile   *_compiler;
+
+    // Initialization
+    static void reset();
+
+    // Return info being collected, select with global flag 'BytecodeParseInfo'
+    int current_count(BPHType info_selector);
+
+   public:
+    BytecodeParseHistogram(Parse *p, Compile *c);
+    static bool initialized();
+
+    // Record info when starting to parse one bytecode
+    void set_initial_state( Bytecodes::Code bc );
+    // Record results of parsing one bytecode
+    void record_change();
+
+    // Profile printing
+    static void print(float cutoff = 0.01F); // cutoff in percent
+  };
+
+  public:
+    // Record work done during parsing
+    BytecodeParseHistogram* _parse_histogram;
+    void set_parse_histogram(BytecodeParseHistogram *bph) { _parse_histogram = bph; }
+    BytecodeParseHistogram* parse_histogram()      { return _parse_histogram; }
+#endif
+
+ private:
+  friend class Block;
+
+  // Variables which characterize this compilation as a whole:
+
+  JVMState*     _caller;        // JVMS which carries incoming args & state.
+  float         _expected_uses; // expected number of calls to this code
+  float         _prof_factor;   // discount applied to my profile counts
+  int           _depth;         // Inline tree depth, for debug printouts
+  const TypeFunc*_tf;           // My kind of function type
+  int           _entry_bci;     // the osr bci or InvocationEntryBci
+
+  ciTypeFlow*   _flow;          // Results of previous flow pass.
+  Block*        _blocks;        // Array of basic-block structs.
+  int           _block_count;   // Number of elements in _blocks.
+
+  GraphKit      _exits;         // Record all normal returns and throws here.
+  bool          _wrote_final;   // Did we write a final field?
+  bool          _count_invocations; // update and test invocation counter
+  bool          _method_data_update; // update method data oop
+
+  // Variables which track Java semantics during bytecode parsing:
+
+  Block*            _block;     // block currently getting parsed
+  ciBytecodeStream  _iter;      // stream of this method's bytecodes
+
+  int           _blocks_merged; // Progress meter: state merges from BB preds
+  int           _blocks_parsed; // Progress meter: BBs actually parsed
+
+  const FastLockNode* _synch_lock; // FastLockNode for synchronized method
+
+#ifndef PRODUCT
+  int _max_switch_depth;        // Debugging SwitchRanges.
+  int _est_switch_depth;        // Debugging SwitchRanges.
+#endif
+
+ public:
+  // Constructor
+  Parse(JVMState* caller, ciMethod* parse_method, float expected_uses);
+
+  virtual Parse* is_Parse() const { return (Parse*)this; }
+
+ public:
+  // Accessors.
+  JVMState*     caller()        const { return _caller; }
+  float         expected_uses() const { return _expected_uses; }
+  float         prof_factor()   const { return _prof_factor; }
+  int           depth()         const { return _depth; }
+  const TypeFunc* tf()          const { return _tf; }
+  //            entry_bci()     -- see osr_bci, etc.
+
+  ciTypeFlow*   flow()          const { return _flow; }
+  //            blocks()        -- see pre_order_at, start_block, etc.
+  int           block_count()   const { return _block_count; }
+
+  GraphKit&     exits()               { return _exits; }
+  bool          wrote_final() const   { return _wrote_final; }
+  void      set_wrote_final(bool z)   { _wrote_final = z; }
+  bool          count_invocations() const  { return _count_invocations; }
+  bool          method_data_update() const { return _method_data_update; }
+
+  Block*             block()    const { return _block; }
+  ciBytecodeStream&  iter()           { return _iter; }
+  Bytecodes::Code    bc()       const { return _iter.cur_bc(); }
+
+  void set_block(Block* b)            { _block = b; }
+
+  // Derived accessors:
+  bool is_normal_parse() const  { return _entry_bci == InvocationEntryBci; }
+  bool is_osr_parse() const     { return _entry_bci != InvocationEntryBci; }
+  int osr_bci() const           { assert(is_osr_parse(),""); return _entry_bci; }
+
+  void set_parse_bci(int bci);
+
+  // Must this parse be aborted?
+  bool failing()                { return C->failing(); }
+
+  Block* pre_order_at(int po) {
+    assert(0 <= po && po < _block_count, "oob");
+    return &_blocks[po];
+  }
+  Block* start_block() {
+    return pre_order_at(flow()->start_block()->pre_order());
+  }
+  // Can return NULL if the flow pass did not complete a block.
+  Block* successor_for_bci(int bci) {
+    return block()->successor_for_bci(bci);
+  }
+
+ private:
+  // Create a JVMS & map for the initial state of this method.
+  SafePointNode* create_entry_map();
+
+  // OSR helpers
+  Node *fetch_interpreter_state(int index, BasicType bt, Node *local_addrs, Node *local_addrs_base);
+  Node* check_interpreter_type(Node* l, const Type* type, SafePointNode* &bad_type_exit);
+  void  load_interpreter_state(Node* osr_buf);
+
+  // Functions for managing basic blocks:
+  void init_blocks();
+  void load_state_from(Block* b);
+  void store_state_to(Block* b) { b->record_state(this); }
+
+  // Parse all the basic blocks.
+  void do_all_blocks();
+
+  // Helper for do_all_blocks; makes one pass in pre-order.
+  void visit_blocks();
+
+  // Parse the current basic block
+  void do_one_block();
+
+  // Raise an error if we get a bad ciTypeFlow CFG.
+  void handle_missing_successor(int bci);
+
+  // first actions (before BCI 0)
+  void do_method_entry();
+
+  // implementation of monitorenter/monitorexit
+  void do_monitor_enter();
+  void do_monitor_exit();
+
+  // Eagerly create phie throughout the state, to cope with back edges.
+  void ensure_phis_everywhere();
+
+  // Merge the current mapping into the basic block starting at bci
+  void merge(          int target_bci);
+  // Same as plain merge, except that it allocates a new path number.
+  void merge_new_path( int target_bci);
+  // Merge the current mapping into an exception handler.
+  void merge_exception(int target_bci);
+  // Helper: Merge the current mapping into the given basic block
+  void merge_common(Block* target, int pnum);
+  // Helper functions for merging individual cells.
+  PhiNode *ensure_phi(       int idx, bool nocreate = false);
+  PhiNode *ensure_memory_phi(int idx, bool nocreate = false);
+  // Helper to merge the current memory state into the given basic block
+  void merge_memory_edges(MergeMemNode* n, int pnum, bool nophi);
+
+  // Parse this bytecode, and alter the Parsers JVM->Node mapping
+  void do_one_bytecode();
+
+  // helper function to generate array store check
+  void array_store_check();
+  // Helper function to generate array load
+  void array_load(BasicType etype);
+  // Helper function to generate array store
+  void array_store(BasicType etype);
+  // Helper function to compute array addressing
+  Node* array_addressing(BasicType type, int vals, const Type* *result2=NULL);
+
+  // Pass current map to exits
+  void return_current(Node* value);
+
+  // Register finalizers on return from Object.<init>
+  void call_register_finalizer();
+
+  // Insert a compiler safepoint into the graph
+  void add_safepoint();
+
+  // Insert a compiler safepoint into the graph, if there is a back-branch.
+  void maybe_add_safepoint(int target_bci) {
+    if (UseLoopSafepoints && target_bci <= bci()) {
+      add_safepoint();
+    }
+  }
+
+  // Note:  Intrinsic generation routines may be found in library_call.cpp.
+
+  // Helper function to setup Ideal Call nodes
+  void do_call();
+
+  // Helper function to uncommon-trap or bailout for non-compilable call-sites
+  bool can_not_compile_call_site(ciMethod *dest_method, ciInstanceKlass *klass);
+
+  // Helper function to identify inlining potential at call-site
+  ciMethod* optimize_inlining(ciMethod* caller, int bci, ciInstanceKlass* klass,
+                              ciMethod *dest_method, const TypeOopPtr* receiver_type);
+
+  // Helper function to setup for type-profile based inlining
+  bool prepare_type_profile_inline(ciInstanceKlass* prof_klass, ciMethod* prof_method);
+
+  // Helper functions for type checking bytecodes:
+  void  do_checkcast();
+  void  do_instanceof();
+
+  // Helper functions for shifting & arithmetic
+  void modf();
+  void modd();
+  void l2f();
+
+  void do_irem();
+
+  // implementation of _get* and _put* bytecodes
+  void do_getstatic() { do_field_access(true,  false); }
+  void do_getfield () { do_field_access(true,  true); }
+  void do_putstatic() { do_field_access(false, false); }
+  void do_putfield () { do_field_access(false, true); }
+
+  // common code for making initial checks and forming addresses
+  void do_field_access(bool is_get, bool is_field);
+  bool static_field_ok_in_clinit(ciField *field, ciMethod *method);
+
+  // common code for actually performing the load or store
+  void do_get_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field);
+  void do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field);
+
+  // loading from a constant field or the constant pool
+  // returns false if push failed (non-perm field constants only, not ldcs)
+  bool push_constant(ciConstant con);
+
+  // implementation of object creation bytecodes
+  void do_new();
+  void do_newarray(BasicType elemtype);
+  void do_anewarray();
+  void do_multianewarray();
+  Node* expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, int ndimensions);
+
+  // implementation of jsr/ret
+  void do_jsr();
+  void do_ret();
+
+  float   dynamic_branch_prediction(float &cnt);
+  float   branch_prediction(float &cnt, BoolTest::mask btest, int target_bci);
+  bool    seems_never_taken(float prob);
+
+  void    do_ifnull(BoolTest::mask btest);
+  void    do_if(BoolTest::mask btest, Node* c);
+  void    repush_if_args();
+  void    adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
+                              Block* path, Block* other_path);
+  IfNode* jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask);
+  Node*   jump_if_join(Node* iffalse, Node* iftrue);
+  void    jump_if_true_fork(IfNode *ifNode, int dest_bci_if_true, int prof_table_index);
+  void    jump_if_false_fork(IfNode *ifNode, int dest_bci_if_false, int prof_table_index);
+  void    jump_if_always_fork(int dest_bci_if_true, int prof_table_index);
+
+  friend class SwitchRange;
+  void    do_tableswitch();
+  void    do_lookupswitch();
+  void    jump_switch_ranges(Node* a, SwitchRange* lo, SwitchRange* hi, int depth = 0);
+  bool    create_jump_tables(Node* a, SwitchRange* lo, SwitchRange* hi);
+
+  // helper functions for methodData style profiling
+  void test_counter_against_threshold(Node* cnt, int limit);
+  void increment_and_test_invocation_counter(int limit);
+  void test_for_osr_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, int limit);
+  Node* method_data_addressing(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
+  void increment_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize offset, Node* idx = NULL, uint stride = 0);
+  void set_md_flag_at(ciMethodData* md, ciProfileData* data, int flag_constant);
+
+  void profile_method_entry();
+  void profile_taken_branch(int target_bci, bool force_update = false);
+  void profile_not_taken_branch(bool force_update = false);
+  void profile_call(Node* receiver);
+  void profile_generic_call();
+  void profile_receiver_type(Node* receiver);
+  void profile_ret(int target_bci);
+  void profile_null_checkcast();
+  void profile_switch_case(int table_index);
+
+  // helper function for call statistics
+  void count_compiled_calls(bool at_method_entry, bool is_inline) PRODUCT_RETURN;
+
+  Node_Notes* make_node_notes(Node_Notes* caller_nn);
+
+  // Helper functions for handling normal and abnormal exits.
+  void build_exits();
+
+  // Fix up all exceptional control flow exiting a single bytecode.
+  void do_exceptions();
+
+  // Fix up all exiting control flow at the end of the parse.
+  void do_exits();
+
+  // Add Catch/CatchProjs
+  // The call is either a Java call or the VM's rethrow stub
+  void catch_call_exceptions(ciExceptionHandlerStream&);
+
+  // Handle all exceptions thrown by the inlined method.
+  // Also handles exceptions for individual bytecodes.
+  void catch_inline_exceptions(SafePointNode* ex_map);
+
+  // Bytecode classifier, helps decide to use uncommon_trap vs. rethrow_C.
+  bool can_rerun_bytecode();
+
+  // Merge the given map into correct exceptional exit state.
+  // Assumes that there is no applicable local handler.
+  void throw_to_exit(SafePointNode* ex_map);
+
+ public:
+#ifndef PRODUCT
+  // Handle PrintOpto, etc.
+  void show_parse_info();
+  void dump_map_adr_mem() const;
+  static void print_statistics(); // Print some performance counters
+  void dump();
+  void dump_bci(int bci);
+#endif
+};
diff --git a/src/share/vm/opto/parse1.cpp b/src/share/vm/opto/parse1.cpp
new file mode 100644
index 000000000..4f8e93162
--- /dev/null
+++ b/src/share/vm/opto/parse1.cpp
@@ -0,0 +1,2166 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse1.cpp.incl"
+
+// Static array so we can figure out which bytecodes stop us from compiling
+// the most. Some of the non-static variables are needed in bytecodeInfo.cpp
+// and eventually should be encapsulated in a proper class (gri 8/18/98).
+
+int nodes_created              = 0; int nodes_created_old              = 0;
+int methods_parsed             = 0; int methods_parsed_old             = 0;
+int methods_seen               = 0; int methods_seen_old               = 0;
+
+int explicit_null_checks_inserted = 0, explicit_null_checks_inserted_old = 0;
+int explicit_null_checks_elided   = 0, explicit_null_checks_elided_old   = 0;
+int all_null_checks_found         = 0, implicit_null_checks              = 0;
+int implicit_null_throws          = 0;
+
+int parse_idx = 0;
+size_t parse_arena = 0;
+int reclaim_idx  = 0;
+int reclaim_in   = 0;
+int reclaim_node = 0;
+
+#ifndef PRODUCT
+bool Parse::BytecodeParseHistogram::_initialized = false;
+uint Parse::BytecodeParseHistogram::_bytecodes_parsed [Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_nodes_constructed[Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_nodes_transformed[Bytecodes::number_of_codes];
+uint Parse::BytecodeParseHistogram::_new_values       [Bytecodes::number_of_codes];
+#endif
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void Parse::print_statistics() {
+  tty->print_cr("--- Compiler Statistics ---");
+  tty->print("Methods seen: %d  Methods parsed: %d", methods_seen, methods_parsed);
+  tty->print("  Nodes created: %d", nodes_created);
+  tty->cr();
+  if (methods_seen != methods_parsed)
+    tty->print_cr("Reasons for parse failures (NOT cumulative):");
+
+  if( explicit_null_checks_inserted )
+    tty->print_cr("%d original NULL checks - %d elided (%2d%%); optimizer leaves %d,", explicit_null_checks_inserted, explicit_null_checks_elided, (100*explicit_null_checks_elided)/explicit_null_checks_inserted, all_null_checks_found);
+  if( all_null_checks_found )
+    tty->print_cr("%d made implicit (%2d%%)", implicit_null_checks,
+                  (100*implicit_null_checks)/all_null_checks_found);
+  if( implicit_null_throws )
+    tty->print_cr("%d implicit null exceptions at runtime",
+                  implicit_null_throws);
+
+  if( PrintParseStatistics && BytecodeParseHistogram::initialized() ) {
+    BytecodeParseHistogram::print();
+  }
+}
+#endif
+
+//------------------------------ON STACK REPLACEMENT---------------------------
+
+// Construct a node which can be used to get incoming state for
+// on stack replacement.
+Node *Parse::fetch_interpreter_state(int index,
+                                     BasicType bt,
+                                     Node *local_addrs,
+                                     Node *local_addrs_base) {
+  Node *mem = memory(Compile::AliasIdxRaw);
+  Node *adr = basic_plus_adr( local_addrs_base, local_addrs, -index*wordSize );
+
+  // Very similar to LoadNode::make, except we handle un-aligned longs and
+  // doubles on Sparc.  Intel can handle them just fine directly.
+  Node *l;
+  switch( bt ) {                // Signature is flattened
+  case T_INT:     l = new (C, 3) LoadINode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
+  case T_FLOAT:   l = new (C, 3) LoadFNode( 0, mem, adr, TypeRawPtr::BOTTOM ); break;
+  case T_ADDRESS:
+  case T_OBJECT:  l = new (C, 3) LoadPNode( 0, mem, adr, TypeRawPtr::BOTTOM, TypeInstPtr::BOTTOM ); break;
+  case T_LONG:
+  case T_DOUBLE: {
+    // Since arguments are in reverse order, the argument address 'adr'
+    // refers to the back half of the long/double.  Recompute adr.
+    adr = basic_plus_adr( local_addrs_base, local_addrs, -(index+1)*wordSize );
+    if( Matcher::misaligned_doubles_ok ) {
+      l = (bt == T_DOUBLE)
+        ? (Node*)new (C, 3) LoadDNode( 0, mem, adr, TypeRawPtr::BOTTOM )
+        : (Node*)new (C, 3) LoadLNode( 0, mem, adr, TypeRawPtr::BOTTOM );
+    } else {
+      l = (bt == T_DOUBLE)
+        ? (Node*)new (C, 3) LoadD_unalignedNode( 0, mem, adr, TypeRawPtr::BOTTOM )
+        : (Node*)new (C, 3) LoadL_unalignedNode( 0, mem, adr, TypeRawPtr::BOTTOM );
+    }
+    break;
+  }
+  default: ShouldNotReachHere();
+  }
+  return _gvn.transform(l);
+}
+
+// Helper routine to prevent the interpreter from handing
+// unexpected typestate to an OSR method.
+// The Node l is a value newly dug out of the interpreter frame.
+// The type is the type predicted by ciTypeFlow.  Note that it is
+// not a general type, but can only come from Type::get_typeflow_type.
+// The safepoint is a map which will feed an uncommon trap.
+Node* Parse::check_interpreter_type(Node* l, const Type* type,
+                                    SafePointNode* &bad_type_exit) {
+
+  const TypeOopPtr* tp = type->isa_oopptr();
+
+  // TypeFlow may assert null-ness if a type appears unloaded.
+  if (type == TypePtr::NULL_PTR ||
+      (tp != NULL && !tp->klass()->is_loaded())) {
+    // Value must be null, not a real oop.
+    Node* chk = _gvn.transform( new (C, 3) CmpPNode(l, null()) );
+    Node* tst = _gvn.transform( new (C, 2) BoolNode(chk, BoolTest::eq) );
+    IfNode* iff = create_and_map_if(control(), tst, PROB_MAX, COUNT_UNKNOWN);
+    set_control(_gvn.transform( new (C, 1) IfTrueNode(iff) ));
+    Node* bad_type = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+    bad_type_exit->control()->add_req(bad_type);
+    l = null();
+  }
+
+  // Typeflow can also cut off paths from the CFG, based on
+  // types which appear unloaded, or call sites which appear unlinked.
+  // When paths are cut off, values at later merge points can rise
+  // toward more specific classes.  Make sure these specific classes
+  // are still in effect.
+  if (tp != NULL && tp->klass() != C->env()->Object_klass()) {
+    // TypeFlow asserted a specific object type.  Value must have that type.
+    Node* bad_type_ctrl = NULL;
+    l = gen_checkcast(l, makecon(TypeKlassPtr::make(tp->klass())), &bad_type_ctrl);
+    bad_type_exit->control()->add_req(bad_type_ctrl);
+  }
+
+  BasicType bt_l = _gvn.type(l)->basic_type();
+  BasicType bt_t = type->basic_type();
+  assert(_gvn.type(l)->higher_equal(type), "must constrain OSR typestate");
+  return l;
+}
+
+// Helper routine which sets up elements of the initial parser map when
+// performing a parse for on stack replacement.  Add values into map.
+// The only parameter contains the address of a interpreter arguments.
+void Parse::load_interpreter_state(Node* osr_buf) {
+  int index;
+  int max_locals = jvms()->loc_size();
+  int max_stack  = jvms()->stk_size();
+
+
+  // Mismatch between method and jvms can occur since map briefly held
+  // an OSR entry state (which takes up one RawPtr word).
+  assert(max_locals == method()->max_locals(), "sanity");
+  assert(max_stack  >= method()->max_stack(),  "sanity");
+  assert((int)jvms()->endoff() == TypeFunc::Parms + max_locals + max_stack, "sanity");
+  assert((int)jvms()->endoff() == (int)map()->req(), "sanity");
+
+  // Find the start block.
+  Block* osr_block = start_block();
+  assert(osr_block->start() == osr_bci(), "sanity");
+
+  // Set initial BCI.
+  set_parse_bci(osr_block->start());
+
+  // Set initial stack depth.
+  set_sp(osr_block->start_sp());
+
+  // Check bailouts.  We currently do not perform on stack replacement
+  // of loops in catch blocks or loops which branch with a non-empty stack.
+  if (sp() != 0) {
+    C->record_method_not_compilable("OSR starts with non-empty stack");
+    return;
+  }
+  // Do not OSR inside finally clauses:
+  if (osr_block->has_trap_at(osr_block->start())) {
+    C->record_method_not_compilable("OSR starts with an immediate trap");
+    return;
+  }
+
+  // Commute monitors from interpreter frame to compiler frame.
+  assert(jvms()->monitor_depth() == 0, "should be no active locks at beginning of osr");
+  int mcnt = osr_block->flow()->monitor_count();
+  Node *monitors_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals+mcnt*2-1)*wordSize);
+  for (index = 0; index < mcnt; index++) {
+    // Make a BoxLockNode for the monitor.
+    Node *box = _gvn.transform(new (C, 1) BoxLockNode(next_monitor()));
+
+
+    // Displaced headers and locked objects are interleaved in the
+    // temp OSR buffer.  We only copy the locked objects out here.
+    // Fetch the locked object from the OSR temp buffer and copy to our fastlock node.
+    Node *lock_object = fetch_interpreter_state(index*2, T_OBJECT, monitors_addr, osr_buf);
+    // Try and copy the displaced header to the BoxNode
+    Node *displaced_hdr = fetch_interpreter_state((index*2) + 1, T_ADDRESS, monitors_addr, osr_buf);
+
+
+    store_to_memory(control(), box, displaced_hdr, T_ADDRESS, Compile::AliasIdxRaw);
+
+    // Build a bogus FastLockNode (no code will be generated) and push the
+    // monitor into our debug info.
+    const FastLockNode *flock = _gvn.transform(new (C, 3) FastLockNode( 0, lock_object, box ))->as_FastLock();
+    map()->push_monitor(flock);
+
+    // If the lock is our method synchronization lock, tuck it away in
+    // _sync_lock for return and rethrow exit paths.
+    if (index == 0 && method()->is_synchronized()) {
+      _synch_lock = flock;
+    }
+  }
+
+  MethodLivenessResult live_locals = method()->liveness_at_bci(osr_bci());
+  if (!live_locals.is_valid()) {
+    // Degenerate or breakpointed method.
+    C->record_method_not_compilable("OSR in empty or breakpointed method");
+    return;
+  }
+
+  // Extract the needed locals from the interpreter frame.
+  Node *locals_addr = basic_plus_adr(osr_buf, osr_buf, (max_locals-1)*wordSize);
+
+  // find all the locals that the interpreter thinks contain live oops
+  const BitMap live_oops = method()->live_local_oops_at_bci(osr_bci());
+  for (index = 0; index < max_locals; index++) {
+
+    if (!live_locals.at(index)) {
+      continue;
+    }
+
+    const Type *type = osr_block->local_type_at(index);
+
+    if (type->isa_oopptr() != NULL) {
+
+      // 6403625: Verify that the interpreter oopMap thinks that the oop is live
+      // else we might load a stale oop if the MethodLiveness disagrees with the
+      // result of the interpreter. If the interpreter says it is dead we agree
+      // by making the value go to top.
+      //
+
+      if (!live_oops.at(index)) {
+        if (C->log() != NULL) {
+          C->log()->elem("OSR_mismatch local_index='%d'",index);
+        }
+        set_local(index, null());
+        // and ignore it for the loads
+        continue;
+      }
+    }
+
+    // Filter out TOP, HALF, and BOTTOM.  (Cf. ensure_phi.)
+    if (type == Type::TOP || type == Type::HALF) {
+      continue;
+    }
+    // If the type falls to bottom, then this must be a local that
+    // is mixing ints and oops or some such.  Forcing it to top
+    // makes it go dead.
+    if (type == Type::BOTTOM) {
+      continue;
+    }
+    // Construct code to access the appropriate local.
+    Node *value = fetch_interpreter_state(index, type->basic_type(), locals_addr, osr_buf);
+    set_local(index, value);
+  }
+
+  // Extract the needed stack entries from the interpreter frame.
+  for (index = 0; index < sp(); index++) {
+    const Type *type = osr_block->stack_type_at(index);
+    if (type != Type::TOP) {
+      // Currently the compiler bails out when attempting to on stack replace
+      // at a bci with a non-empty stack.  We should not reach here.
+      ShouldNotReachHere();
+    }
+  }
+
+  // End the OSR migration
+  make_runtime_call(RC_LEAF, OptoRuntime::osr_end_Type(),
+                    CAST_FROM_FN_PTR(address, SharedRuntime::OSR_migration_end),
+                    "OSR_migration_end", TypeRawPtr::BOTTOM,
+                    osr_buf);
+
+  // Now that the interpreter state is loaded, make sure it will match
+  // at execution time what the compiler is expecting now:
+  SafePointNode* bad_type_exit = clone_map();
+  bad_type_exit->set_control(new (C, 1) RegionNode(1));
+
+  for (index = 0; index < max_locals; index++) {
+    if (stopped())  break;
+    Node* l = local(index);
+    if (l->is_top())  continue;  // nothing here
+    const Type *type = osr_block->local_type_at(index);
+    if (type->isa_oopptr() != NULL) {
+      if (!live_oops.at(index)) {
+        // skip type check for dead oops
+        continue;
+      }
+    }
+    set_local(index, check_interpreter_type(l, type, bad_type_exit));
+  }
+
+  for (index = 0; index < sp(); index++) {
+    if (stopped())  break;
+    Node* l = stack(index);
+    if (l->is_top())  continue;  // nothing here
+    const Type *type = osr_block->stack_type_at(index);
+    set_stack(index, check_interpreter_type(l, type, bad_type_exit));
+  }
+
+  if (bad_type_exit->control()->req() > 1) {
+    // Build an uncommon trap here, if any inputs can be unexpected.
+    bad_type_exit->set_control(_gvn.transform( bad_type_exit->control() ));
+    record_for_igvn(bad_type_exit->control());
+    SafePointNode* types_are_good = map();
+    set_map(bad_type_exit);
+    // The unexpected type happens because a new edge is active
+    // in the CFG, which typeflow had previously ignored.
+    // E.g., Object x = coldAtFirst() && notReached()? "str": new Integer(123).
+    // This x will be typed as Integer if notReached is not yet linked.
+    uncommon_trap(Deoptimization::Reason_unreached,
+                  Deoptimization::Action_reinterpret);
+    set_map(types_are_good);
+  }
+}
+
+//------------------------------Parse------------------------------------------
+// Main parser constructor.
+Parse::Parse(JVMState* caller, ciMethod* parse_method, float expected_uses)
+  : _exits(caller)
+{
+  // Init some variables
+  _caller = caller;
+  _method = parse_method;
+  _expected_uses = expected_uses;
+  _depth = 1 + (caller->has_method() ? caller->depth() : 0);
+  _wrote_final = false;
+  _entry_bci = InvocationEntryBci;
+  _tf = NULL;
+  _block = NULL;
+  debug_only(_block_count = -1);
+  debug_only(_blocks = (Block*)-1);
+#ifndef PRODUCT
+  if (PrintCompilation || PrintOpto) {
+    // Make sure I have an inline tree, so I can print messages about it.
+    JVMState* ilt_caller = is_osr_parse() ? caller->caller() : caller;
+    InlineTree::find_subtree_from_root(C->ilt(), ilt_caller, parse_method, true);
+  }
+  _max_switch_depth = 0;
+  _est_switch_depth = 0;
+#endif
+
+  _tf = TypeFunc::make(method());
+  _iter.reset_to_method(method());
+  _flow = method()->get_flow_analysis();
+  if (_flow->failing()) {
+    C->record_method_not_compilable_all_tiers(_flow->failure_reason());
+  }
+
+  if (_expected_uses <= 0) {
+    _prof_factor = 1;
+  } else {
+    float prof_total = parse_method->interpreter_invocation_count();
+    if (prof_total <= _expected_uses) {
+      _prof_factor = 1;
+    } else {
+      _prof_factor = _expected_uses / prof_total;
+    }
+  }
+
+  CompileLog* log = C->log();
+  if (log != NULL) {
+    log->begin_head("parse method='%d' uses='%g'",
+                    log->identify(parse_method), expected_uses);
+    if (depth() == 1 && C->is_osr_compilation()) {
+      log->print(" osr_bci='%d'", C->entry_bci());
+    }
+    log->stamp();
+    log->end_head();
+  }
+
+  // Accumulate deoptimization counts.
+  // (The range_check and store_check counts are checked elsewhere.)
+  ciMethodData* md = method()->method_data();
+  for (uint reason = 0; reason < md->trap_reason_limit(); reason++) {
+    uint md_count = md->trap_count(reason);
+    if (md_count != 0) {
+      if (md_count == md->trap_count_limit())
+        md_count += md->overflow_trap_count();
+      uint total_count = C->trap_count(reason);
+      uint old_count   = total_count;
+      total_count += md_count;
+      // Saturate the add if it overflows.
+      if (total_count < old_count || total_count < md_count)
+        total_count = (uint)-1;
+      C->set_trap_count(reason, total_count);
+      if (log != NULL)
+        log->elem("observe trap='%s' count='%d' total='%d'",
+                  Deoptimization::trap_reason_name(reason),
+                  md_count, total_count);
+    }
+  }
+  // Accumulate total sum of decompilations, also.
+  C->set_decompile_count(C->decompile_count() + md->decompile_count());
+
+  _count_invocations = C->do_count_invocations();
+  _method_data_update = C->do_method_data_update();
+
+  if (log != NULL && method()->has_exception_handlers()) {
+    log->elem("observe that='has_exception_handlers'");
+  }
+
+  assert(method()->can_be_compiled(),       "Can not parse this method, cutout earlier");
+  assert(method()->has_balanced_monitors(), "Can not parse unbalanced monitors, cutout earlier");
+
+  // Always register dependence if JVMTI is enabled, because
+  // either breakpoint setting or hotswapping of methods may
+  // cause deoptimization.
+  if (JvmtiExport::can_hotswap_or_post_breakpoint()) {
+    C->dependencies()->assert_evol_method(method());
+  }
+
+  methods_seen++;
+
+  // Do some special top-level things.
+  if (depth() == 1 && C->is_osr_compilation()) {
+    _entry_bci = C->entry_bci();
+    _flow = method()->get_osr_flow_analysis(osr_bci());
+    if (_flow->failing()) {
+      C->record_method_not_compilable(_flow->failure_reason());
+#ifndef PRODUCT
+      if (PrintOpto && (Verbose || WizardMode)) {
+        tty->print_cr("OSR @%d type flow bailout: %s", _entry_bci, _flow->failure_reason());
+        if (Verbose) {
+          method()->print_oop();
+          method()->print_codes();
+          _flow->print();
+        }
+      }
+#endif
+    }
+    _tf = C->tf();     // the OSR entry type is different
+  }
+
+#ifdef ASSERT
+  if (depth() == 1) {
+    assert(C->is_osr_compilation() == this->is_osr_parse(), "OSR in sync");
+    if (C->tf() != tf()) {
+      MutexLockerEx ml(Compile_lock, Mutex::_no_safepoint_check_flag);
+      assert(C->env()->system_dictionary_modification_counter_changed(),
+             "Must invalidate if TypeFuncs differ");
+    }
+  } else {
+    assert(!this->is_osr_parse(), "no recursive OSR");
+  }
+#endif
+
+  methods_parsed++;
+#ifndef PRODUCT
+  // add method size here to guarantee that inlined methods are added too
+  if (TimeCompiler)
+    _total_bytes_compiled += method()->code_size();
+
+  show_parse_info();
+#endif
+
+  if (failing()) {
+    if (log)  log->done("parse");
+    return;
+  }
+
+  gvn().set_type(root(), root()->bottom_type());
+  gvn().transform(top());
+
+  // Import the results of the ciTypeFlow.
+  init_blocks();
+
+  // Merge point for all normal exits
+  build_exits();
+
+  // Setup the initial JVM state map.
+  SafePointNode* entry_map = create_entry_map();
+
+  // Check for bailouts during map initialization
+  if (failing() || entry_map == NULL) {
+    if (log)  log->done("parse");
+    return;
+  }
+
+  Node_Notes* caller_nn = C->default_node_notes();
+  // Collect debug info for inlined calls unless -XX:-DebugInlinedCalls.
+  if (DebugInlinedCalls || depth() == 1) {
+    C->set_default_node_notes(make_node_notes(caller_nn));
+  }
+
+  if (is_osr_parse()) {
+    Node* osr_buf = entry_map->in(TypeFunc::Parms+0);
+    entry_map->set_req(TypeFunc::Parms+0, top());
+    set_map(entry_map);
+    load_interpreter_state(osr_buf);
+  } else {
+    set_map(entry_map);
+    do_method_entry();
+  }
+
+  // Check for bailouts during method entry.
+  if (failing()) {
+    if (log)  log->done("parse");
+    C->set_default_node_notes(caller_nn);
+    return;
+  }
+
+  entry_map = map();  // capture any changes performed by method setup code
+  assert(jvms()->endoff() == map()->req(), "map matches JVMS layout");
+
+  // We begin parsing as if we have just encountered a jump to the
+  // method entry.
+  Block* entry_block = start_block();
+  assert(entry_block->start() == (is_osr_parse() ? osr_bci() : 0), "");
+  set_map_clone(entry_map);
+  merge_common(entry_block, entry_block->next_path_num());
+
+#ifndef PRODUCT
+  BytecodeParseHistogram *parse_histogram_obj = new (C->env()->arena()) BytecodeParseHistogram(this, C);
+  set_parse_histogram( parse_histogram_obj );
+#endif
+
+  // Parse all the basic blocks.
+  do_all_blocks();
+
+  C->set_default_node_notes(caller_nn);
+
+  // Check for bailouts during conversion to graph
+  if (failing()) {
+    if (log)  log->done("parse");
+    return;
+  }
+
+  // Fix up all exiting control flow.
+  set_map(entry_map);
+  do_exits();
+
+  // Collect a few more statistics.
+  parse_idx += C->unique();
+  parse_arena += C->node_arena()->used();
+
+  if (log)  log->done("parse nodes='%d' memory='%d'",
+                      C->unique(), C->node_arena()->used());
+}
+
+//---------------------------do_all_blocks-------------------------------------
+void Parse::do_all_blocks() {
+  _blocks_merged = 0;
+  _blocks_parsed = 0;
+
+  int old_blocks_merged = -1;
+  int old_blocks_parsed = -1;
+
+  for (int tries = 0; ; tries++) {
+    visit_blocks();
+    if (failing())  return; // Check for bailout
+
+    // No need for a work list.  The outer loop is hardly ever repeated.
+    // The following loop traverses the blocks in a reasonable pre-order,
+    // as produced by the ciTypeFlow pass.
+
+    // This loop can be taken more than once if there are two entries to
+    // a loop (irreduceable CFG), and the edge which ciTypeFlow chose
+    // as the first predecessor to the loop goes dead in the parser,
+    // due to parse-time optimization.  (Could happen with obfuscated code.)
+
+    // Look for progress, or the lack of it:
+    if (_blocks_parsed == block_count()) {
+      // That's all, folks.
+      if (TraceOptoParse) {
+        tty->print_cr("All blocks parsed.");
+      }
+      break;
+    }
+
+    // How much work was done this time around?
+    int new_blocks_merged = _blocks_merged - old_blocks_merged;
+    int new_blocks_parsed = _blocks_parsed - old_blocks_parsed;
+    if (new_blocks_merged == 0) {
+      if (TraceOptoParse) {
+        tty->print_cr("All live blocks parsed; %d dead blocks.", block_count() - _blocks_parsed);
+      }
+      // No new blocks have become parseable.  Some blocks are just dead.
+      break;
+    }
+    assert(new_blocks_parsed > 0, "must make progress");
+    assert(tries < block_count(), "the pre-order cannot be this bad!");
+
+    old_blocks_merged = _blocks_merged;
+    old_blocks_parsed = _blocks_parsed;
+  }
+
+#ifndef PRODUCT
+  // Make sure there are no half-processed blocks remaining.
+  // Every remaining unprocessed block is dead and may be ignored now.
+  for (int po = 0; po < block_count(); po++) {
+    Block* block = pre_order_at(po);
+    if (!block->is_parsed()) {
+      if (TraceOptoParse) {
+        tty->print("Skipped dead block %d at bci:%d", po, block->start());
+        assert(!block->is_merged(), "no half-processed blocks");
+      }
+    }
+  }
+#endif
+}
+
+//---------------------------visit_blocks--------------------------------------
+void Parse::visit_blocks() {
+  // Walk over all blocks, parsing every one that has been reached (merged).
+  for (int po = 0; po < block_count(); po++) {
+    Block* block = pre_order_at(po);
+
+    if (block->is_parsed()) {
+      // Do not parse twice.
+      continue;
+    }
+
+    if (!block->is_merged()) {
+      // No state on this block.  It had not yet been reached.
+      // Delay reaching it until later.
+      continue;
+    }
+
+    // Prepare to parse this block.
+    load_state_from(block);
+
+    if (stopped()) {
+      // Block is dead.
+      continue;
+    }
+
+    if (!block->is_ready() || block->is_handler()) {
+      // Not all preds have been parsed.  We must build phis everywhere.
+      // (Note that dead locals do not get phis built, ever.)
+      ensure_phis_everywhere();
+
+      // Leave behind an undisturbed copy of the map, for future merges.
+      set_map(clone_map());
+    }
+
+    // Ready or not, parse the block.
+    do_one_block();
+
+    // Check for bailouts.
+    if (failing())  return;
+  }
+}
+
+//-------------------------------build_exits----------------------------------
+// Build normal and exceptional exit merge points.
+void Parse::build_exits() {
+  // make a clone of caller to prevent sharing of side-effects
+  _exits.set_map(_exits.clone_map());
+  _exits.clean_stack(_exits.sp());
+  _exits.sync_jvms();
+
+  RegionNode* region = new (C, 1) RegionNode(1);
+  record_for_igvn(region);
+  gvn().set_type_bottom(region);
+  _exits.set_control(region);
+
+  // Note:  iophi and memphi are not transformed until do_exits.
+  Node* iophi  = new (C, region->req()) PhiNode(region, Type::ABIO);
+  Node* memphi = new (C, region->req()) PhiNode(region, Type::MEMORY, TypePtr::BOTTOM);
+  _exits.set_i_o(iophi);
+  _exits.set_all_memory(memphi);
+
+  // Add a return value to the exit state.  (Do not push it yet.)
+  if (tf()->range()->cnt() > TypeFunc::Parms) {
+    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+    // Don't "bind" an unloaded return klass to the ret_phi. If the klass
+    // becomes loaded during the subsequent parsing, the loaded and unloaded
+    // types will not join when we transform and push in do_exits().
+    const TypeOopPtr* ret_oop_type = ret_type->isa_oopptr();
+    if (ret_oop_type && !ret_oop_type->klass()->is_loaded()) {
+      ret_type = TypeOopPtr::BOTTOM;
+    }
+    int         ret_size = type2size[ret_type->basic_type()];
+    Node*       ret_phi  = new (C, region->req()) PhiNode(region, ret_type);
+    _exits.ensure_stack(ret_size);
+    assert((int)(tf()->range()->cnt() - TypeFunc::Parms) == ret_size, "good tf range");
+    assert(method()->return_type()->size() == ret_size, "tf agrees w/ method");
+    _exits.set_argument(0, ret_phi);  // here is where the parser finds it
+    // Note:  ret_phi is not yet pushed, until do_exits.
+  }
+}
+
+
+//----------------------------build_start_state-------------------------------
+// Construct a state which contains only the incoming arguments from an
+// unknown caller.  The method & bci will be NULL & InvocationEntryBci.
+JVMState* Compile::build_start_state(StartNode* start, const TypeFunc* tf) {
+  int        arg_size = tf->domain()->cnt();
+  int        max_size = MAX2(arg_size, (int)tf->range()->cnt());
+  JVMState*  jvms     = new (this) JVMState(max_size - TypeFunc::Parms);
+  SafePointNode* map  = new (this, max_size) SafePointNode(max_size, NULL);
+  record_for_igvn(map);
+  assert(arg_size == TypeFunc::Parms + (is_osr_compilation() ? 1 : method()->arg_size()), "correct arg_size");
+  Node_Notes* old_nn = default_node_notes();
+  if (old_nn != NULL && has_method()) {
+    Node_Notes* entry_nn = old_nn->clone(this);
+    JVMState* entry_jvms = new(this) JVMState(method(), old_nn->jvms());
+    entry_jvms->set_offsets(0);
+    entry_jvms->set_bci(entry_bci());
+    entry_nn->set_jvms(entry_jvms);
+    set_default_node_notes(entry_nn);
+  }
+  uint i;
+  for (i = 0; i < (uint)arg_size; i++) {
+    Node* parm = initial_gvn()->transform(new (this, 1) ParmNode(start, i));
+    map->init_req(i, parm);
+    // Record all these guys for later GVN.
+    record_for_igvn(parm);
+  }
+  for (; i < map->req(); i++) {
+    map->init_req(i, top());
+  }
+  assert(jvms->argoff() == TypeFunc::Parms, "parser gets arguments here");
+  set_default_node_notes(old_nn);
+  map->set_jvms(jvms);
+  jvms->set_map(map);
+  return jvms;
+}
+
+//-----------------------------make_node_notes---------------------------------
+Node_Notes* Parse::make_node_notes(Node_Notes* caller_nn) {
+  if (caller_nn == NULL)  return NULL;
+  Node_Notes* nn = caller_nn->clone(C);
+  JVMState* caller_jvms = nn->jvms();
+  JVMState* jvms = new (C) JVMState(method(), caller_jvms);
+  jvms->set_offsets(0);
+  jvms->set_bci(_entry_bci);
+  nn->set_jvms(jvms);
+  return nn;
+}
+
+
+//--------------------------return_values--------------------------------------
+void Compile::return_values(JVMState* jvms) {
+  GraphKit kit(jvms);
+  Node* ret = new (this, TypeFunc::Parms) ReturnNode(TypeFunc::Parms,
+                             kit.control(),
+                             kit.i_o(),
+                             kit.reset_memory(),
+                             kit.frameptr(),
+                             kit.returnadr());
+  // Add zero or 1 return values
+  int ret_size = tf()->range()->cnt() - TypeFunc::Parms;
+  if (ret_size > 0) {
+    kit.inc_sp(-ret_size);  // pop the return value(s)
+    kit.sync_jvms();
+    ret->add_req(kit.argument(0));
+    // Note:  The second dummy edge is not needed by a ReturnNode.
+  }
+  // bind it to root
+  root()->add_req(ret);
+  record_for_igvn(ret);
+  initial_gvn()->transform_no_reclaim(ret);
+}
+
+//------------------------rethrow_exceptions-----------------------------------
+// Bind all exception states in the list into a single RethrowNode.
+void Compile::rethrow_exceptions(JVMState* jvms) {
+  GraphKit kit(jvms);
+  if (!kit.has_exceptions())  return;  // nothing to generate
+  // Load my combined exception state into the kit, with all phis transformed:
+  SafePointNode* ex_map = kit.combine_and_pop_all_exception_states();
+  Node* ex_oop = kit.use_exception_state(ex_map);
+  RethrowNode* exit = new (this, TypeFunc::Parms + 1) RethrowNode(kit.control(),
+                                      kit.i_o(), kit.reset_memory(),
+                                      kit.frameptr(), kit.returnadr(),
+                                      // like a return but with exception input
+                                      ex_oop);
+  // bind to root
+  root()->add_req(exit);
+  record_for_igvn(exit);
+  initial_gvn()->transform_no_reclaim(exit);
+}
+
+bool Parse::can_rerun_bytecode() {
+  switch (bc()) {
+  case Bytecodes::_ldc:
+  case Bytecodes::_ldc_w:
+  case Bytecodes::_ldc2_w:
+  case Bytecodes::_getfield:
+  case Bytecodes::_putfield:
+  case Bytecodes::_getstatic:
+  case Bytecodes::_putstatic:
+  case Bytecodes::_arraylength:
+  case Bytecodes::_baload:
+  case Bytecodes::_caload:
+  case Bytecodes::_iaload:
+  case Bytecodes::_saload:
+  case Bytecodes::_faload:
+  case Bytecodes::_aaload:
+  case Bytecodes::_laload:
+  case Bytecodes::_daload:
+  case Bytecodes::_bastore:
+  case Bytecodes::_castore:
+  case Bytecodes::_iastore:
+  case Bytecodes::_sastore:
+  case Bytecodes::_fastore:
+  case Bytecodes::_aastore:
+  case Bytecodes::_lastore:
+  case Bytecodes::_dastore:
+  case Bytecodes::_irem:
+  case Bytecodes::_idiv:
+  case Bytecodes::_lrem:
+  case Bytecodes::_ldiv:
+  case Bytecodes::_frem:
+  case Bytecodes::_fdiv:
+  case Bytecodes::_drem:
+  case Bytecodes::_ddiv:
+  case Bytecodes::_checkcast:
+  case Bytecodes::_instanceof:
+  case Bytecodes::_athrow:
+  case Bytecodes::_anewarray:
+  case Bytecodes::_newarray:
+  case Bytecodes::_multianewarray:
+  case Bytecodes::_new:
+  case Bytecodes::_monitorenter:  // can re-run initial null check, only
+  case Bytecodes::_return:
+    return true;
+    break;
+
+  case Bytecodes::_invokestatic:
+  case Bytecodes::_invokespecial:
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokeinterface:
+    return false;
+    break;
+
+  default:
+    assert(false, "unexpected bytecode produced an exception");
+    return true;
+  }
+}
+
+//---------------------------do_exceptions-------------------------------------
+// Process exceptions arising from the current bytecode.
+// Send caught exceptions to the proper handler within this method.
+// Unhandled exceptions feed into _exit.
+void Parse::do_exceptions() {
+  if (!has_exceptions())  return;
+
+  if (failing()) {
+    // Pop them all off and throw them away.
+    while (pop_exception_state() != NULL) ;
+    return;
+  }
+
+  // Make sure we can classify this bytecode if we need to.
+  debug_only(can_rerun_bytecode());
+
+  PreserveJVMState pjvms(this, false);
+
+  SafePointNode* ex_map;
+  while ((ex_map = pop_exception_state()) != NULL) {
+    if (!method()->has_exception_handlers()) {
+      // Common case:  Transfer control outward.
+      // Doing it this early allows the exceptions to common up
+      // even between adjacent method calls.
+      throw_to_exit(ex_map);
+    } else {
+      // Have to look at the exception first.
+      assert(stopped(), "catch_inline_exceptions trashes the map");
+      catch_inline_exceptions(ex_map);
+      stop_and_kill_map();      // we used up this exception state; kill it
+    }
+  }
+
+  // We now return to our regularly scheduled program:
+}
+
+//---------------------------throw_to_exit-------------------------------------
+// Merge the given map into an exception exit from this method.
+// The exception exit will handle any unlocking of receiver.
+// The ex_oop must be saved within the ex_map, unlike merge_exception.
+void Parse::throw_to_exit(SafePointNode* ex_map) {
+  // Pop the JVMS to (a copy of) the caller.
+  GraphKit caller;
+  caller.set_map_clone(_caller->map());
+  caller.set_bci(_caller->bci());
+  caller.set_sp(_caller->sp());
+  // Copy out the standard machine state:
+  for (uint i = 0; i < TypeFunc::Parms; i++) {
+    caller.map()->set_req(i, ex_map->in(i));
+  }
+  // ...and the exception:
+  Node*          ex_oop        = saved_ex_oop(ex_map);
+  SafePointNode* caller_ex_map = caller.make_exception_state(ex_oop);
+  // Finally, collect the new exception state in my exits:
+  _exits.add_exception_state(caller_ex_map);
+}
+
+//------------------------------do_exits---------------------------------------
+void Parse::do_exits() {
+  set_parse_bci(InvocationEntryBci);
+
+  // Now peephole on the return bits
+  Node* region = _exits.control();
+  _exits.set_control(gvn().transform(region));
+
+  Node* iophi = _exits.i_o();
+  _exits.set_i_o(gvn().transform(iophi));
+
+  if (wrote_final()) {
+    // This method (which must be a constructor by the rules of Java)
+    // wrote a final.  The effects of all initializations must be
+    // committed to memory before any code after the constructor
+    // publishes the reference to the newly constructor object.
+    // Rather than wait for the publication, we simply block the
+    // writes here.  Rather than put a barrier on only those writes
+    // which are required to complete, we force all writes to complete.
+    //
+    // "All bets are off" unless the first publication occurs after a
+    // normal return from the constructor.  We do not attempt to detect
+    // such unusual early publications.  But no barrier is needed on
+    // exceptional returns, since they cannot publish normally.
+    //
+    _exits.insert_mem_bar(Op_MemBarRelease);
+#ifndef PRODUCT
+    if (PrintOpto && (Verbose || WizardMode)) {
+      method()->print_name();
+      tty->print_cr(" writes finals and needs a memory barrier");
+    }
+#endif
+  }
+
+  for (MergeMemStream mms(_exits.merged_memory()); mms.next_non_empty(); ) {
+    // transform each slice of the original memphi:
+    mms.set_memory(_gvn.transform(mms.memory()));
+  }
+
+  if (tf()->range()->cnt() > TypeFunc::Parms) {
+    const Type* ret_type = tf()->range()->field_at(TypeFunc::Parms);
+    Node*       ret_phi  = _gvn.transform( _exits.argument(0) );
+    assert(_exits.control()->is_top() || !_gvn.type(ret_phi)->empty(), "return value must be well defined");
+    _exits.push_node(ret_type->basic_type(), ret_phi);
+  }
+
+  // Note:  Logic for creating and optimizing the ReturnNode is in Compile.
+
+  // Unlock along the exceptional paths.
+  // This is done late so that we can common up equivalent exceptions
+  // (e.g., null checks) arising from multiple points within this method.
+  // See GraphKit::add_exception_state, which performs the commoning.
+  bool do_synch = method()->is_synchronized() && GenerateSynchronizationCode;
+
+  // record exit from a method if compiled while Dtrace is turned on.
+  if (do_synch || DTraceMethodProbes) {
+    // First move the exception list out of _exits:
+    GraphKit kit(_exits.transfer_exceptions_into_jvms());
+    SafePointNode* normal_map = kit.map();  // keep this guy safe
+    // Now re-collect the exceptions into _exits:
+    SafePointNode* ex_map;
+    while ((ex_map = kit.pop_exception_state()) != NULL) {
+      Node* ex_oop = kit.use_exception_state(ex_map);
+      // Force the exiting JVM state to have this method at InvocationEntryBci.
+      // The exiting JVM state is otherwise a copy of the calling JVMS.
+      JVMState* caller = kit.jvms();
+      JVMState* ex_jvms = caller->clone_shallow(C);
+      ex_jvms->set_map(kit.clone_map());
+      ex_jvms->map()->set_jvms(ex_jvms);
+      ex_jvms->set_bci(   InvocationEntryBci);
+      kit.set_jvms(ex_jvms);
+      if (do_synch) {
+        // Add on the synchronized-method box/object combo
+        kit.map()->push_monitor(_synch_lock);
+        // Unlock!
+        kit.shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
+      }
+      if (DTraceMethodProbes) {
+        kit.make_dtrace_method_exit(method());
+      }
+      // Done with exception-path processing.
+      ex_map = kit.make_exception_state(ex_oop);
+      assert(ex_jvms->same_calls_as(ex_map->jvms()), "sanity");
+      // Pop the last vestige of this method:
+      ex_map->set_jvms(caller->clone_shallow(C));
+      ex_map->jvms()->set_map(ex_map);
+      _exits.push_exception_state(ex_map);
+    }
+    assert(_exits.map() == normal_map, "keep the same return state");
+  }
+
+  {
+    // Capture very early exceptions (receiver null checks) from caller JVMS
+    GraphKit caller(_caller);
+    SafePointNode* ex_map;
+    while ((ex_map = caller.pop_exception_state()) != NULL) {
+      _exits.add_exception_state(ex_map);
+    }
+  }
+}
+
+//-----------------------------create_entry_map-------------------------------
+// Initialize our parser map to contain the types at method entry.
+// For OSR, the map contains a single RawPtr parameter.
+// Initial monitor locking for sync. methods is performed by do_method_entry.
+SafePointNode* Parse::create_entry_map() {
+  // Check for really stupid bail-out cases.
+  uint len = TypeFunc::Parms + method()->max_locals() + method()->max_stack();
+  if (len >= 32760) {
+    C->record_method_not_compilable_all_tiers("too many local variables");
+    return NULL;
+  }
+
+  // If this is an inlined method, we may have to do a receiver null check.
+  if (_caller->has_method() && is_normal_parse() && !method()->is_static()) {
+    GraphKit kit(_caller);
+    kit.null_check_receiver(method());
+    _caller = kit.transfer_exceptions_into_jvms();
+    if (kit.stopped()) {
+      _exits.add_exception_states_from(_caller);
+      _exits.set_jvms(_caller);
+      return NULL;
+    }
+  }
+
+  assert(method() != NULL, "parser must have a method");
+
+  // Create an initial safepoint to hold JVM state during parsing
+  JVMState* jvms = new (C) JVMState(method(), _caller->has_method() ? _caller : NULL);
+  set_map(new (C, len) SafePointNode(len, jvms));
+  jvms->set_map(map());
+  record_for_igvn(map());
+  assert(jvms->endoff() == len, "correct jvms sizing");
+
+  SafePointNode* inmap = _caller->map();
+  assert(inmap != NULL, "must have inmap");
+
+  uint i;
+
+  // Pass thru the predefined input parameters.
+  for (i = 0; i < TypeFunc::Parms; i++) {
+    map()->init_req(i, inmap->in(i));
+  }
+
+  if (depth() == 1) {
+    assert(map()->memory()->Opcode() == Op_Parm, "");
+    // Insert the memory aliasing node
+    set_all_memory(reset_memory());
+  }
+  assert(merged_memory(), "");
+
+  // Now add the locals which are initially bound to arguments:
+  uint arg_size = tf()->domain()->cnt();
+  ensure_stack(arg_size - TypeFunc::Parms);  // OSR methods have funny args
+  for (i = TypeFunc::Parms; i < arg_size; i++) {
+    map()->init_req(i, inmap->argument(_caller, i - TypeFunc::Parms));
+  }
+
+  // Clear out the rest of the map (locals and stack)
+  for (i = arg_size; i < len; i++) {
+    map()->init_req(i, top());
+  }
+
+  SafePointNode* entry_map = stop();
+  return entry_map;
+}
+
+//-----------------------------do_method_entry--------------------------------
+// Emit any code needed in the pseudo-block before BCI zero.
+// The main thing to do is lock the receiver of a synchronized method.
+void Parse::do_method_entry() {
+  set_parse_bci(InvocationEntryBci); // Pseudo-BCP
+  set_sp(0);                      // Java Stack Pointer
+
+  NOT_PRODUCT( count_compiled_calls(true/*at_method_entry*/, false/*is_inline*/); )
+
+  if (DTraceMethodProbes) {
+    make_dtrace_method_entry(method());
+  }
+
+  // If the method is synchronized, we need to construct a lock node, attach
+  // it to the Start node, and pin it there.
+  if (method()->is_synchronized()) {
+    // Insert a FastLockNode right after the Start which takes as arguments
+    // the current thread pointer, the "this" pointer & the address of the
+    // stack slot pair used for the lock.  The "this" pointer is a projection
+    // off the start node, but the locking spot has to be constructed by
+    // creating a ConLNode of 0, and boxing it with a BoxLockNode.  The BoxLockNode
+    // becomes the second argument to the FastLockNode call.  The
+    // FastLockNode becomes the new control parent to pin it to the start.
+
+    // Setup Object Pointer
+    Node *lock_obj = NULL;
+    if(method()->is_static()) {
+      ciInstance* mirror = _method->holder()->java_mirror();
+      const TypeInstPtr *t_lock = TypeInstPtr::make(mirror);
+      lock_obj = makecon(t_lock);
+    } else {                  // Else pass the "this" pointer,
+      lock_obj = local(0);    // which is Parm0 from StartNode
+    }
+    // Clear out dead values from the debug info.
+    kill_dead_locals();
+    // Build the FastLockNode
+    _synch_lock = shared_lock(lock_obj);
+  }
+
+  if (depth() == 1) {
+    increment_and_test_invocation_counter(Tier2CompileThreshold);
+  }
+}
+
+//------------------------------init_blocks------------------------------------
+// Initialize our parser map to contain the types/monitors at method entry.
+void Parse::init_blocks() {
+  // Create the blocks.
+  _block_count = flow()->block_count();
+  _blocks = NEW_RESOURCE_ARRAY(Block, _block_count);
+  Copy::zero_to_bytes(_blocks, sizeof(Block)*_block_count);
+
+  int po;
+
+  // Initialize the structs.
+  for (po = 0; po < block_count(); po++) {
+    Block* block = pre_order_at(po);
+    block->init_node(this, po);
+  }
+
+  // Collect predecessor and successor information.
+  for (po = 0; po < block_count(); po++) {
+    Block* block = pre_order_at(po);
+    block->init_graph(this);
+  }
+}
+
+//-------------------------------init_node-------------------------------------
+void Parse::Block::init_node(Parse* outer, int po) {
+  _flow = outer->flow()->pre_order_at(po);
+  _pred_count = 0;
+  _preds_parsed = 0;
+  _count = 0;
+  assert(pred_count() == 0 && preds_parsed() == 0, "sanity");
+  assert(!(is_merged() || is_parsed() || is_handler()), "sanity");
+  assert(_live_locals.size() == 0, "sanity");
+
+  // entry point has additional predecessor
+  if (flow()->is_start())  _pred_count++;
+  assert(flow()->is_start() == (this == outer->start_block()), "");
+}
+
+//-------------------------------init_graph------------------------------------
+void Parse::Block::init_graph(Parse* outer) {
+  // Create the successor list for this parser block.
+  GrowableArray<ciTypeFlow::Block*>* tfs = flow()->successors();
+  GrowableArray<ciTypeFlow::Block*>* tfe = flow()->exceptions();
+  int ns = tfs->length();
+  int ne = tfe->length();
+  _num_successors = ns;
+  _all_successors = ns+ne;
+  _successors = (ns+ne == 0) ? NULL : NEW_RESOURCE_ARRAY(Block*, ns+ne);
+  int p = 0;
+  for (int i = 0; i < ns+ne; i++) {
+    ciTypeFlow::Block* tf2 = (i < ns) ? tfs->at(i) : tfe->at(i-ns);
+    Block* block2 = outer->pre_order_at(tf2->pre_order());
+    _successors[i] = block2;
+
+    // Accumulate pred info for the other block, too.
+    if (i < ns) {
+      block2->_pred_count++;
+    } else {
+      block2->_is_handler = true;
+    }
+
+    #ifdef ASSERT
+    // A block's successors must be distinguishable by BCI.
+    // That is, no bytecode is allowed to branch to two different
+    // clones of the same code location.
+    for (int j = 0; j < i; j++) {
+      Block* block1 = _successors[j];
+      if (block1 == block2)  continue;  // duplicates are OK
+      assert(block1->start() != block2->start(), "successors have unique bcis");
+    }
+    #endif
+  }
+
+  // Note: We never call next_path_num along exception paths, so they
+  // never get processed as "ready".  Also, the input phis of exception
+  // handlers get specially processed, so that
+}
+
+//---------------------------successor_for_bci---------------------------------
+Parse::Block* Parse::Block::successor_for_bci(int bci) {
+  for (int i = 0; i < all_successors(); i++) {
+    Block* block2 = successor_at(i);
+    if (block2->start() == bci)  return block2;
+  }
+  // We can actually reach here if ciTypeFlow traps out a block
+  // due to an unloaded class, and concurrently with compilation the
+  // class is then loaded, so that a later phase of the parser is
+  // able to see more of the bytecode CFG.  Or, the flow pass and
+  // the parser can have a minor difference of opinion about executability
+  // of bytecodes.  For example, "obj.field = null" is executable even
+  // if the field's type is an unloaded class; the flow pass used to
+  // make a trap for such code.
+  return NULL;
+}
+
+
+//-----------------------------stack_type_at-----------------------------------
+const Type* Parse::Block::stack_type_at(int i) const {
+  return get_type(flow()->stack_type_at(i));
+}
+
+
+//-----------------------------local_type_at-----------------------------------
+const Type* Parse::Block::local_type_at(int i) const {
+  // Make dead locals fall to bottom.
+  if (_live_locals.size() == 0) {
+    MethodLivenessResult live_locals = flow()->outer()->method()->liveness_at_bci(start());
+    // This bitmap can be zero length if we saw a breakpoint.
+    // In such cases, pretend they are all live.
+    ((Block*)this)->_live_locals = live_locals;
+  }
+  if (_live_locals.size() > 0 && !_live_locals.at(i))
+    return Type::BOTTOM;
+
+  return get_type(flow()->local_type_at(i));
+}
+
+
+#ifndef PRODUCT
+
+//----------------------------name_for_bc--------------------------------------
+// helper method for BytecodeParseHistogram
+static const char* name_for_bc(int i) {
+  return Bytecodes::is_defined(i) ? Bytecodes::name(Bytecodes::cast(i)) : "xxxunusedxxx";
+}
+
+//----------------------------BytecodeParseHistogram------------------------------------
+Parse::BytecodeParseHistogram::BytecodeParseHistogram(Parse *p, Compile *c) {
+  _parser   = p;
+  _compiler = c;
+  if( ! _initialized ) { _initialized = true; reset(); }
+}
+
+//----------------------------current_count------------------------------------
+int Parse::BytecodeParseHistogram::current_count(BPHType bph_type) {
+  switch( bph_type ) {
+  case BPH_transforms: { return _parser->gvn().made_progress(); }
+  case BPH_values:     { return _parser->gvn().made_new_values(); }
+  default: { ShouldNotReachHere(); return 0; }
+  }
+}
+
+//----------------------------initialized--------------------------------------
+bool Parse::BytecodeParseHistogram::initialized() { return _initialized; }
+
+//----------------------------reset--------------------------------------------
+void Parse::BytecodeParseHistogram::reset() {
+  int i = Bytecodes::number_of_codes;
+  while (i-- > 0) { _bytecodes_parsed[i] = 0; _nodes_constructed[i] = 0; _nodes_transformed[i] = 0; _new_values[i] = 0; }
+}
+
+//----------------------------set_initial_state--------------------------------
+// Record info when starting to parse one bytecode
+void Parse::BytecodeParseHistogram::set_initial_state( Bytecodes::Code bc ) {
+  if( PrintParseStatistics && !_parser->is_osr_parse() ) {
+    _initial_bytecode    = bc;
+    _initial_node_count  = _compiler->unique();
+    _initial_transforms  = current_count(BPH_transforms);
+    _initial_values      = current_count(BPH_values);
+  }
+}
+
+//----------------------------record_change--------------------------------
+// Record results of parsing one bytecode
+void Parse::BytecodeParseHistogram::record_change() {
+  if( PrintParseStatistics && !_parser->is_osr_parse() ) {
+    ++_bytecodes_parsed[_initial_bytecode];
+    _nodes_constructed [_initial_bytecode] += (_compiler->unique() - _initial_node_count);
+    _nodes_transformed [_initial_bytecode] += (current_count(BPH_transforms) - _initial_transforms);
+    _new_values        [_initial_bytecode] += (current_count(BPH_values)     - _initial_values);
+  }
+}
+
+
+//----------------------------print--------------------------------------------
+void Parse::BytecodeParseHistogram::print(float cutoff) {
+  ResourceMark rm;
+  // print profile
+  int total  = 0;
+  int i      = 0;
+  for( i = 0; i < Bytecodes::number_of_codes; ++i ) { total += _bytecodes_parsed[i]; }
+  int abs_sum = 0;
+  tty->cr();   //0123456789012345678901234567890123456789012345678901234567890123456789
+  tty->print_cr("Histogram of %d parsed bytecodes:", total);
+  if( total == 0 ) { return; }
+  tty->cr();
+  tty->print_cr("absolute:  count of compiled bytecodes of this type");
+  tty->print_cr("relative:  percentage contribution to compiled nodes");
+  tty->print_cr("nodes   :  Average number of nodes constructed per bytecode");
+  tty->print_cr("rnodes  :  Significance towards total nodes constructed, (nodes*relative)");
+  tty->print_cr("transforms: Average amount of tranform progress per bytecode compiled");
+  tty->print_cr("values  :  Average number of node values improved per bytecode");
+  tty->print_cr("name    :  Bytecode name");
+  tty->cr();
+  tty->print_cr("  absolute  relative   nodes  rnodes  transforms  values   name");
+  tty->print_cr("----------------------------------------------------------------------");
+  while (--i > 0) {
+    int       abs = _bytecodes_parsed[i];
+    float     rel = abs * 100.0F / total;
+    float   nodes = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _nodes_constructed[i])/_bytecodes_parsed[i];
+    float  rnodes = _bytecodes_parsed[i] == 0 ? 0 :  rel * nodes;
+    float  xforms = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _nodes_transformed[i])/_bytecodes_parsed[i];
+    float  values = _bytecodes_parsed[i] == 0 ? 0 : (1.0F * _new_values       [i])/_bytecodes_parsed[i];
+    if (cutoff <= rel) {
+      tty->print_cr("%10d  %7.2f%%  %6.1f  %6.2f   %6.1f   %6.1f     %s", abs, rel, nodes, rnodes, xforms, values, name_for_bc(i));
+      abs_sum += abs;
+    }
+  }
+  tty->print_cr("----------------------------------------------------------------------");
+  float rel_sum = abs_sum * 100.0F / total;
+  tty->print_cr("%10d  %7.2f%%    (cutoff = %.2f%%)", abs_sum, rel_sum, cutoff);
+  tty->print_cr("----------------------------------------------------------------------");
+  tty->cr();
+}
+#endif
+
+//----------------------------load_state_from----------------------------------
+// Load block/map/sp.  But not do not touch iter/bci.
+void Parse::load_state_from(Block* block) {
+  set_block(block);
+  // load the block's JVM state:
+  set_map(block->start_map());
+  set_sp( block->start_sp());
+}
+
+
+//-----------------------------record_state------------------------------------
+void Parse::Block::record_state(Parse* p) {
+  assert(!is_merged(), "can only record state once, on 1st inflow");
+  assert(start_sp() == p->sp(), "stack pointer must agree with ciTypeFlow");
+  set_start_map(p->stop());
+}
+
+
+//------------------------------do_one_block-----------------------------------
+void Parse::do_one_block() {
+  if (TraceOptoParse) {
+    Block *b = block();
+    int ns = b->num_successors();
+    int nt = b->all_successors();
+
+    tty->print("Parsing block #%d at bci [%d,%d), successors: ",
+                  block()->pre_order(), block()->start(), block()->limit());
+    for (int i = 0; i < nt; i++) {
+      tty->print((( i < ns) ? " %d" : " %d(e)"), b->successor_at(i)->pre_order());
+    }
+    tty->print_cr("");
+  }
+
+  assert(block()->is_merged(), "must be merged before being parsed");
+  block()->mark_parsed();
+  ++_blocks_parsed;
+
+  // Set iterator to start of block.
+  iter().reset_to_bci(block()->start());
+
+  CompileLog* log = C->log();
+
+  // Parse bytecodes
+  while (!stopped() && !failing()) {
+    iter().next();
+
+    // Learn the current bci from the iterator:
+    set_parse_bci(iter().cur_bci());
+
+    if (bci() == block()->limit()) {
+      // Do not walk into the next block until directed by do_all_blocks.
+      merge(bci());
+      break;
+    }
+    assert(bci() < block()->limit(), "bci still in block");
+
+    if (log != NULL) {
+      // Output an optional context marker, to help place actions
+      // that occur during parsing of this BC.  If there is no log
+      // output until the next context string, this context string
+      // will be silently ignored.
+      log->context()->reset();
+      log->context()->print_cr("<bc code='%d' bci='%d'/>", (int)bc(), bci());
+    }
+
+    if (block()->has_trap_at(bci())) {
+      // We must respect the flow pass's traps, because it will refuse
+      // to produce successors for trapping blocks.
+      int trap_index = block()->flow()->trap_index();
+      assert(trap_index != 0, "trap index must be valid");
+      uncommon_trap(trap_index);
+      break;
+    }
+
+    NOT_PRODUCT( parse_histogram()->set_initial_state(bc()); );
+
+#ifdef ASSERT
+    int pre_bc_sp = sp();
+    int inputs, depth;
+    bool have_se = !stopped() && compute_stack_effects(inputs, depth);
+    assert(!have_se || pre_bc_sp >= inputs, "have enough stack to execute this BC");
+#endif //ASSERT
+
+    do_one_bytecode();
+
+    assert(!have_se || stopped() || failing() || (sp() - pre_bc_sp) == depth, "correct depth prediction");
+
+    do_exceptions();
+
+    NOT_PRODUCT( parse_histogram()->record_change(); );
+
+    if (log != NULL)  log->context()->reset();  // done w/ this one
+
+    // Fall into next bytecode.  Each bytecode normally has 1 sequential
+    // successor which is typically made ready by visiting this bytecode.
+    // If the successor has several predecessors, then it is a merge
+    // point, starts a new basic block, and is handled like other basic blocks.
+  }
+}
+
+
+//------------------------------merge------------------------------------------
+void Parse::set_parse_bci(int bci) {
+  set_bci(bci);
+  Node_Notes* nn = C->default_node_notes();
+  if (nn == NULL)  return;
+
+  // Collect debug info for inlined calls unless -XX:-DebugInlinedCalls.
+  if (!DebugInlinedCalls && depth() > 1) {
+    return;
+  }
+
+  // Update the JVMS annotation, if present.
+  JVMState* jvms = nn->jvms();
+  if (jvms != NULL && jvms->bci() != bci) {
+    // Update the JVMS.
+    jvms = jvms->clone_shallow(C);
+    jvms->set_bci(bci);
+    nn->set_jvms(jvms);
+  }
+}
+
+//------------------------------merge------------------------------------------
+// Merge the current mapping into the basic block starting at bci
+void Parse::merge(int target_bci) {
+  Block* target = successor_for_bci(target_bci);
+  if (target == NULL) { handle_missing_successor(target_bci); return; }
+  assert(!target->is_ready(), "our arrival must be expected");
+  int pnum = target->next_path_num();
+  merge_common(target, pnum);
+}
+
+//-------------------------merge_new_path--------------------------------------
+// Merge the current mapping into the basic block, using a new path
+void Parse::merge_new_path(int target_bci) {
+  Block* target = successor_for_bci(target_bci);
+  if (target == NULL) { handle_missing_successor(target_bci); return; }
+  assert(!target->is_ready(), "new path into frozen graph");
+  int pnum = target->add_new_path();
+  merge_common(target, pnum);
+}
+
+//-------------------------merge_exception-------------------------------------
+// Merge the current mapping into the basic block starting at bci
+// The ex_oop must be pushed on the stack, unlike throw_to_exit.
+void Parse::merge_exception(int target_bci) {
+  assert(sp() == 1, "must have only the throw exception on the stack");
+  Block* target = successor_for_bci(target_bci);
+  if (target == NULL) { handle_missing_successor(target_bci); return; }
+  assert(target->is_handler(), "exceptions are handled by special blocks");
+  int pnum = target->add_new_path();
+  merge_common(target, pnum);
+}
+
+//--------------------handle_missing_successor---------------------------------
+void Parse::handle_missing_successor(int target_bci) {
+#ifndef PRODUCT
+  Block* b = block();
+  int trap_bci = b->flow()->has_trap()? b->flow()->trap_bci(): -1;
+  tty->print_cr("### Missing successor at bci:%d for block #%d (trap_bci:%d)", target_bci, b->pre_order(), trap_bci);
+#endif
+  ShouldNotReachHere();
+}
+
+//--------------------------merge_common---------------------------------------
+void Parse::merge_common(Parse::Block* target, int pnum) {
+  if (TraceOptoParse) {
+    tty->print("Merging state at block #%d bci:%d", target->pre_order(), target->start());
+  }
+
+  // Zap extra stack slots to top
+  assert(sp() == target->start_sp(), "");
+  clean_stack(sp());
+
+  if (!target->is_merged()) {   // No prior mapping at this bci
+    if (TraceOptoParse) { tty->print(" with empty state");  }
+
+    // If this path is dead, do not bother capturing it as a merge.
+    // It is "as if" we had 1 fewer predecessors from the beginning.
+    if (stopped()) {
+      if (TraceOptoParse)  tty->print_cr(", but path is dead and doesn't count");
+      return;
+    }
+
+    // Record that a new block has been merged.
+    ++_blocks_merged;
+
+    // Make a region if we know there are multiple or unpredictable inputs.
+    // (Also, if this is a plain fall-through, we might see another region,
+    // which must not be allowed into this block's map.)
+    if (pnum > PhiNode::Input         // Known multiple inputs.
+        || target->is_handler()       // These have unpredictable inputs.
+        || control()->is_Region()) {  // We must hide this guy.
+      // Add a Region to start the new basic block.  Phis will be added
+      // later lazily.
+      int edges = target->pred_count();
+      if (edges < pnum)  edges = pnum;  // might be a new path!
+      Node *r = new (C, edges+1) RegionNode(edges+1);
+      gvn().set_type(r, Type::CONTROL);
+      record_for_igvn(r);
+      // zap all inputs to NULL for debugging (done in Node(uint) constructor)
+      // for (int j = 1; j < edges+1; j++) { r->init_req(j, NULL); }
+      r->init_req(pnum, control());
+      set_control(r);
+    }
+
+    // Convert the existing Parser mapping into a mapping at this bci.
+    store_state_to(target);
+    assert(target->is_merged(), "do not come here twice");
+
+  } else {                      // Prior mapping at this bci
+    if (TraceOptoParse) {  tty->print(" with previous state"); }
+
+    // We must not manufacture more phis if the target is already parsed.
+    bool nophi = target->is_parsed();
+
+    SafePointNode* newin = map();// Hang on to incoming mapping
+    Block* save_block = block(); // Hang on to incoming block;
+    load_state_from(target);    // Get prior mapping
+
+    assert(newin->jvms()->locoff() == jvms()->locoff(), "JVMS layouts agree");
+    assert(newin->jvms()->stkoff() == jvms()->stkoff(), "JVMS layouts agree");
+    assert(newin->jvms()->monoff() == jvms()->monoff(), "JVMS layouts agree");
+    assert(newin->jvms()->endoff() == jvms()->endoff(), "JVMS layouts agree");
+
+    // Iterate over my current mapping and the old mapping.
+    // Where different, insert Phi functions.
+    // Use any existing Phi functions.
+    assert(control()->is_Region(), "must be merging to a region");
+    RegionNode* r = control()->as_Region();
+
+    // Compute where to merge into
+    // Merge incoming control path
+    r->set_req(pnum, newin->control());
+
+    if (pnum == 1) {            // Last merge for this Region?
+      _gvn.transform_no_reclaim(r);
+      record_for_igvn(r);
+    }
+
+    // Update all the non-control inputs to map:
+    assert(TypeFunc::Parms == newin->jvms()->locoff(), "parser map should contain only youngest jvms");
+    for (uint j = 1; j < newin->req(); j++) {
+      Node* m = map()->in(j);   // Current state of target.
+      Node* n = newin->in(j);   // Incoming change to target state.
+      PhiNode* phi;
+      if (m->is_Phi() && m->as_Phi()->region() == r)
+        phi = m->as_Phi();
+      else
+        phi = NULL;
+      if (m != n) {             // Different; must merge
+        switch (j) {
+        // Frame pointer and Return Address never changes
+        case TypeFunc::FramePtr:// Drop m, use the original value
+        case TypeFunc::ReturnAdr:
+          break;
+        case TypeFunc::Memory:  // Merge inputs to the MergeMem node
+          assert(phi == NULL, "the merge contains phis, not vice versa");
+          merge_memory_edges(n->as_MergeMem(), pnum, nophi);
+          continue;
+        default:                // All normal stuff
+          if (phi == NULL)  phi = ensure_phi(j, nophi);
+          break;
+        }
+      }
+      // At this point, n might be top if:
+      //  - there is no phi (because TypeFlow detected a conflict), or
+      //  - the corresponding control edges is top (a dead incoming path)
+      // It is a bug if we create a phi which sees a garbage value on a live path.
+
+      if (phi != NULL) {
+        assert(n != top() || r->in(pnum) == top(), "live value must not be garbage");
+        assert(phi->region() == r, "");
+        phi->set_req(pnum, n);  // Then add 'n' to the merge
+        if (pnum == PhiNode::Input) {
+          // Last merge for this Phi.
+          // So far, Phis have had a reasonable type from ciTypeFlow.
+          // Now _gvn will join that with the meet of current inputs.
+          // BOTTOM is never permissible here, 'cause pessimistically
+          // Phis of pointers cannot lose the basic pointer type.
+          debug_only(const Type* bt1 = phi->bottom_type());
+          assert(bt1 != Type::BOTTOM, "should not be building conflict phis");
+          map()->set_req(j, _gvn.transform_no_reclaim(phi));
+          debug_only(const Type* bt2 = phi->bottom_type());
+          assert(bt2->higher_equal(bt1), "must be consistent with type-flow");
+          record_for_igvn(phi);
+        }
+      }
+    } // End of for all values to be merged
+
+    if (pnum == PhiNode::Input &&
+        !r->in(0)) {         // The occasional useless Region
+      assert(control() == r, "");
+      set_control(r->nonnull_req());
+    }
+
+    // newin has been subsumed into the lazy merge, and is now dead.
+    set_block(save_block);
+
+    stop();                     // done with this guy, for now
+  }
+
+  if (TraceOptoParse) {
+    tty->print_cr(" on path %d", pnum);
+  }
+
+  // Done with this parser state.
+  assert(stopped(), "");
+}
+
+
+//--------------------------merge_memory_edges---------------------------------
+void Parse::merge_memory_edges(MergeMemNode* n, int pnum, bool nophi) {
+  // (nophi means we must not create phis, because we already parsed here)
+  assert(n != NULL, "");
+  // Merge the inputs to the MergeMems
+  MergeMemNode* m = merged_memory();
+
+  assert(control()->is_Region(), "must be merging to a region");
+  RegionNode* r = control()->as_Region();
+
+  PhiNode* base = NULL;
+  MergeMemNode* remerge = NULL;
+  for (MergeMemStream mms(m, n); mms.next_non_empty2(); ) {
+    Node *p = mms.force_memory();
+    Node *q = mms.memory2();
+    if (mms.is_empty() && nophi) {
+      // Trouble:  No new splits allowed after a loop body is parsed.
+      // Instead, wire the new split into a MergeMem on the backedge.
+      // The optimizer will sort it out, slicing the phi.
+      if (remerge == NULL) {
+        assert(base != NULL, "");
+        assert(base->in(0) != NULL, "should not be xformed away");
+        remerge = MergeMemNode::make(C, base->in(pnum));
+        gvn().set_type(remerge, Type::MEMORY);
+        base->set_req(pnum, remerge);
+      }
+      remerge->set_memory_at(mms.alias_idx(), q);
+      continue;
+    }
+    assert(!q->is_MergeMem(), "");
+    PhiNode* phi;
+    if (p != q) {
+      phi = ensure_memory_phi(mms.alias_idx(), nophi);
+    } else {
+      if (p->is_Phi() && p->as_Phi()->region() == r)
+        phi = p->as_Phi();
+      else
+        phi = NULL;
+    }
+    // Insert q into local phi
+    if (phi != NULL) {
+      assert(phi->region() == r, "");
+      p = phi;
+      phi->set_req(pnum, q);
+      if (mms.at_base_memory()) {
+        base = phi;  // delay transforming it
+      } else if (pnum == 1) {
+        record_for_igvn(phi);
+        p = _gvn.transform_no_reclaim(phi);
+      }
+      mms.set_memory(p);// store back through the iterator
+    }
+  }
+  // Transform base last, in case we must fiddle with remerging.
+  if (base != NULL && pnum == 1) {
+    record_for_igvn(base);
+    m->set_base_memory( _gvn.transform_no_reclaim(base) );
+  }
+}
+
+
+//------------------------ensure_phis_everywhere-------------------------------
+void Parse::ensure_phis_everywhere() {
+  ensure_phi(TypeFunc::I_O);
+
+  // Ensure a phi on all currently known memories.
+  for (MergeMemStream mms(merged_memory()); mms.next_non_empty(); ) {
+    ensure_memory_phi(mms.alias_idx());
+    debug_only(mms.set_memory());  // keep the iterator happy
+  }
+
+  // Note:  This is our only chance to create phis for memory slices.
+  // If we miss a slice that crops up later, it will have to be
+  // merged into the base-memory phi that we are building here.
+  // Later, the optimizer will comb out the knot, and build separate
+  // phi-loops for each memory slice that matters.
+
+  // Monitors must nest nicely and not get confused amongst themselves.
+  // Phi-ify everything up to the monitors, though.
+  uint monoff = map()->jvms()->monoff();
+  uint nof_monitors = map()->jvms()->nof_monitors();
+
+  assert(TypeFunc::Parms == map()->jvms()->locoff(), "parser map should contain only youngest jvms");
+  for (uint i = TypeFunc::Parms; i < monoff; i++) {
+    ensure_phi(i);
+  }
+  // Even monitors need Phis, though they are well-structured.
+  // This is true for OSR methods, and also for the rare cases where
+  // a monitor object is the subject of a replace_in_map operation.
+  // See bugs 4426707 and 5043395.
+  for (uint m = 0; m < nof_monitors; m++) {
+    ensure_phi(map()->jvms()->monitor_obj_offset(m));
+  }
+}
+
+
+//-----------------------------add_new_path------------------------------------
+// Add a previously unaccounted predecessor to this block.
+int Parse::Block::add_new_path() {
+  // If there is no map, return the lowest unused path number.
+  if (!is_merged())  return pred_count()+1;  // there will be a map shortly
+
+  SafePointNode* map = start_map();
+  if (!map->control()->is_Region())
+    return pred_count()+1;  // there may be a region some day
+  RegionNode* r = map->control()->as_Region();
+
+  // Add new path to the region.
+  uint pnum = r->req();
+  r->add_req(NULL);
+
+  for (uint i = 1; i < map->req(); i++) {
+    Node* n = map->in(i);
+    if (i == TypeFunc::Memory) {
+      // Ensure a phi on all currently known memories.
+      for (MergeMemStream mms(n->as_MergeMem()); mms.next_non_empty(); ) {
+        Node* phi = mms.memory();
+        if (phi->is_Phi() && phi->as_Phi()->region() == r) {
+          assert(phi->req() == pnum, "must be same size as region");
+          phi->add_req(NULL);
+        }
+      }
+    } else {
+      if (n->is_Phi() && n->as_Phi()->region() == r) {
+        assert(n->req() == pnum, "must be same size as region");
+        n->add_req(NULL);
+      }
+    }
+  }
+
+  return pnum;
+}
+
+//------------------------------ensure_phi-------------------------------------
+// Turn the idx'th entry of the current map into a Phi
+PhiNode *Parse::ensure_phi(int idx, bool nocreate) {
+  SafePointNode* map = this->map();
+  Node* region = map->control();
+  assert(region->is_Region(), "");
+
+  Node* o = map->in(idx);
+  assert(o != NULL, "");
+
+  if (o == top())  return NULL; // TOP always merges into TOP
+
+  if (o->is_Phi() && o->as_Phi()->region() == region) {
+    return o->as_Phi();
+  }
+
+  // Now use a Phi here for merging
+  assert(!nocreate, "Cannot build a phi for a block already parsed.");
+  const JVMState* jvms = map->jvms();
+  const Type* t;
+  if (jvms->is_loc(idx)) {
+    t = block()->local_type_at(idx - jvms->locoff());
+  } else if (jvms->is_stk(idx)) {
+    t = block()->stack_type_at(idx - jvms->stkoff());
+  } else if (jvms->is_mon(idx)) {
+    assert(!jvms->is_monitor_box(idx), "no phis for boxes");
+    t = TypeInstPtr::BOTTOM; // this is sufficient for a lock object
+  } else if ((uint)idx < TypeFunc::Parms) {
+    t = o->bottom_type();  // Type::RETURN_ADDRESS or such-like.
+  } else {
+    assert(false, "no type information for this phi");
+  }
+
+  // If the type falls to bottom, then this must be a local that
+  // is mixing ints and oops or some such.  Forcing it to top
+  // makes it go dead.
+  if (t == Type::BOTTOM) {
+    map->set_req(idx, top());
+    return NULL;
+  }
+
+  // Do not create phis for top either.
+  // A top on a non-null control flow must be an unused even after the.phi.
+  if (t == Type::TOP || t == Type::HALF) {
+    map->set_req(idx, top());
+    return NULL;
+  }
+
+  PhiNode* phi = PhiNode::make(region, o, t);
+  gvn().set_type(phi, t);
+  if (DoEscapeAnalysis) record_for_igvn(phi);
+  map->set_req(idx, phi);
+  return phi;
+}
+
+//--------------------------ensure_memory_phi----------------------------------
+// Turn the idx'th slice of the current memory into a Phi
+PhiNode *Parse::ensure_memory_phi(int idx, bool nocreate) {
+  MergeMemNode* mem = merged_memory();
+  Node* region = control();
+  assert(region->is_Region(), "");
+
+  Node *o = (idx == Compile::AliasIdxBot)? mem->base_memory(): mem->memory_at(idx);
+  assert(o != NULL && o != top(), "");
+
+  PhiNode* phi;
+  if (o->is_Phi() && o->as_Phi()->region() == region) {
+    phi = o->as_Phi();
+    if (phi == mem->base_memory() && idx >= Compile::AliasIdxRaw) {
+      // clone the shared base memory phi to make a new memory split
+      assert(!nocreate, "Cannot build a phi for a block already parsed.");
+      const Type* t = phi->bottom_type();
+      const TypePtr* adr_type = C->get_adr_type(idx);
+      phi = phi->slice_memory(adr_type);
+      gvn().set_type(phi, t);
+    }
+    return phi;
+  }
+
+  // Now use a Phi here for merging
+  assert(!nocreate, "Cannot build a phi for a block already parsed.");
+  const Type* t = o->bottom_type();
+  const TypePtr* adr_type = C->get_adr_type(idx);
+  phi = PhiNode::make(region, o, t, adr_type);
+  gvn().set_type(phi, t);
+  if (idx == Compile::AliasIdxBot)
+    mem->set_base_memory(phi);
+  else
+    mem->set_memory_at(idx, phi);
+  return phi;
+}
+
+//------------------------------call_register_finalizer-----------------------
+// Check the klass of the receiver and call register_finalizer if the
+// class need finalization.
+void Parse::call_register_finalizer() {
+  Node* receiver = local(0);
+  assert(receiver != NULL && receiver->bottom_type()->isa_instptr() != NULL,
+         "must have non-null instance type");
+
+  const TypeInstPtr *tinst = receiver->bottom_type()->isa_instptr();
+  if (tinst != NULL && tinst->klass()->is_loaded() && !tinst->klass_is_exact()) {
+    // The type isn't known exactly so see if CHA tells us anything.
+    ciInstanceKlass* ik = tinst->klass()->as_instance_klass();
+    if (!Dependencies::has_finalizable_subclass(ik)) {
+      // No finalizable subclasses so skip the dynamic check.
+      C->dependencies()->assert_has_no_finalizable_subclasses(ik);
+      return;
+    }
+  }
+
+  // Insert a dynamic test for whether the instance needs
+  // finalization.  In general this will fold up since the concrete
+  // class is often visible so the access flags are constant.
+  Node* klass_addr = basic_plus_adr( receiver, receiver, oopDesc::klass_offset_in_bytes() );
+  Node* klass = _gvn.transform(new (C, 3) LoadKlassNode(NULL, immutable_memory(), klass_addr, TypeInstPtr::KLASS));
+
+  Node* access_flags_addr = basic_plus_adr(klass, klass, Klass::access_flags_offset_in_bytes() + sizeof(oopDesc));
+  Node* access_flags = make_load(NULL, access_flags_addr, TypeInt::INT, T_INT);
+
+  Node* mask  = _gvn.transform(new (C, 3) AndINode(access_flags, intcon(JVM_ACC_HAS_FINALIZER)));
+  Node* check = _gvn.transform(new (C, 3) CmpINode(mask, intcon(0)));
+  Node* test  = _gvn.transform(new (C, 2) BoolNode(check, BoolTest::ne));
+
+  IfNode* iff = create_and_map_if(control(), test, PROB_MAX, COUNT_UNKNOWN);
+
+  RegionNode* result_rgn = new (C, 3) RegionNode(3);
+  record_for_igvn(result_rgn);
+
+  Node *skip_register = _gvn.transform(new (C, 1) IfFalseNode(iff));
+  result_rgn->init_req(1, skip_register);
+
+  Node *needs_register = _gvn.transform(new (C, 1) IfTrueNode(iff));
+  set_control(needs_register);
+  if (stopped()) {
+    // There is no slow path.
+    result_rgn->init_req(2, top());
+  } else {
+    Node *call = make_runtime_call(RC_NO_LEAF,
+                                   OptoRuntime::register_finalizer_Type(),
+                                   OptoRuntime::register_finalizer_Java(),
+                                   NULL, TypePtr::BOTTOM,
+                                   receiver);
+    make_slow_call_ex(call, env()->Throwable_klass(), true);
+
+    Node* fast_io  = call->in(TypeFunc::I_O);
+    Node* fast_mem = call->in(TypeFunc::Memory);
+    // These two phis are pre-filled with copies of of the fast IO and Memory
+    Node* io_phi   = PhiNode::make(result_rgn, fast_io,  Type::ABIO);
+    Node* mem_phi  = PhiNode::make(result_rgn, fast_mem, Type::MEMORY, TypePtr::BOTTOM);
+
+    result_rgn->init_req(2, control());
+    io_phi    ->init_req(2, i_o());
+    mem_phi   ->init_req(2, reset_memory());
+
+    set_all_memory( _gvn.transform(mem_phi) );
+    set_i_o(        _gvn.transform(io_phi) );
+  }
+
+  set_control( _gvn.transform(result_rgn) );
+}
+
+//------------------------------return_current---------------------------------
+// Append current _map to _exit_return
+void Parse::return_current(Node* value) {
+  if (RegisterFinalizersAtInit &&
+      method()->intrinsic_id() == vmIntrinsics::_Object_init) {
+    call_register_finalizer();
+  }
+
+  // Do not set_parse_bci, so that return goo is credited to the return insn.
+  set_bci(InvocationEntryBci);
+  if (method()->is_synchronized() && GenerateSynchronizationCode) {
+    shared_unlock(_synch_lock->box_node(), _synch_lock->obj_node());
+  }
+  if (DTraceMethodProbes) {
+    make_dtrace_method_exit(method());
+  }
+  SafePointNode* exit_return = _exits.map();
+  exit_return->in( TypeFunc::Control  )->add_req( control() );
+  exit_return->in( TypeFunc::I_O      )->add_req( i_o    () );
+  Node *mem = exit_return->in( TypeFunc::Memory   );
+  for (MergeMemStream mms(mem->as_MergeMem(), merged_memory()); mms.next_non_empty2(); ) {
+    if (mms.is_empty()) {
+      // get a copy of the base memory, and patch just this one input
+      const TypePtr* adr_type = mms.adr_type(C);
+      Node* phi = mms.force_memory()->as_Phi()->slice_memory(adr_type);
+      assert(phi->as_Phi()->region() == mms.base_memory()->in(0), "");
+      gvn().set_type_bottom(phi);
+      phi->del_req(phi->req()-1);  // prepare to re-patch
+      mms.set_memory(phi);
+    }
+    mms.memory()->add_req(mms.memory2());
+  }
+
+  // frame pointer is always same, already captured
+  if (value != NULL) {
+    // If returning oops to an interface-return, there is a silent free
+    // cast from oop to interface allowed by the Verifier.  Make it explicit
+    // here.
+    Node* phi = _exits.argument(0);
+    const TypeInstPtr *tr = phi->bottom_type()->isa_instptr();
+    if( tr && tr->klass()->is_loaded() &&
+        tr->klass()->is_interface() ) {
+      const TypeInstPtr *tp = value->bottom_type()->isa_instptr();
+      if (tp && tp->klass()->is_loaded() &&
+          !tp->klass()->is_interface()) {
+        // sharpen the type eagerly; this eases certain assert checking
+        if (tp->higher_equal(TypeInstPtr::NOTNULL))
+          tr = tr->join(TypeInstPtr::NOTNULL)->is_instptr();
+        value = _gvn.transform(new (C, 2) CheckCastPPNode(0,value,tr));
+      }
+    }
+    phi->add_req(value);
+  }
+
+  stop_and_kill_map();          // This CFG path dies here
+}
+
+
+//------------------------------add_safepoint----------------------------------
+void Parse::add_safepoint() {
+  // See if we can avoid this safepoint.  No need for a SafePoint immediately
+  // after a Call (except Leaf Call) or another SafePoint.
+  Node *proj = control();
+  bool add_poll_param = SafePointNode::needs_polling_address_input();
+  uint parms = add_poll_param ? TypeFunc::Parms+1 : TypeFunc::Parms;
+  if( proj->is_Proj() ) {
+    Node *n0 = proj->in(0);
+    if( n0->is_Catch() ) {
+      n0 = n0->in(0)->in(0);
+      assert( n0->is_Call(), "expect a call here" );
+    }
+    if( n0->is_Call() ) {
+      if( n0->as_Call()->guaranteed_safepoint() )
+        return;
+    } else if( n0->is_SafePoint() && n0->req() >= parms ) {
+      return;
+    }
+  }
+
+  // Clear out dead values from the debug info.
+  kill_dead_locals();
+
+  // Clone the JVM State
+  SafePointNode *sfpnt = new (C, parms) SafePointNode(parms, NULL);
+
+  // Capture memory state BEFORE a SafePoint.  Since we can block at a
+  // SafePoint we need our GC state to be safe; i.e. we need all our current
+  // write barriers (card marks) to not float down after the SafePoint so we
+  // must read raw memory.  Likewise we need all oop stores to match the card
+  // marks.  If deopt can happen, we need ALL stores (we need the correct JVM
+  // state on a deopt).
+
+  // We do not need to WRITE the memory state after a SafePoint.  The control
+  // edge will keep card-marks and oop-stores from floating up from below a
+  // SafePoint and our true dependency added here will keep them from floating
+  // down below a SafePoint.
+
+  // Clone the current memory state
+  Node* mem = MergeMemNode::make(C, map()->memory());
+
+  mem = _gvn.transform(mem);
+
+  // Pass control through the safepoint
+  sfpnt->init_req(TypeFunc::Control  , control());
+  // Fix edges normally used by a call
+  sfpnt->init_req(TypeFunc::I_O      , top() );
+  sfpnt->init_req(TypeFunc::Memory   , mem   );
+  sfpnt->init_req(TypeFunc::ReturnAdr, top() );
+  sfpnt->init_req(TypeFunc::FramePtr , top() );
+
+  // Create a node for the polling address
+  if( add_poll_param ) {
+    Node *polladr = ConPNode::make(C, (address)os::get_polling_page());
+    sfpnt->init_req(TypeFunc::Parms+0, _gvn.transform(polladr));
+  }
+
+  // Fix up the JVM State edges
+  add_safepoint_edges(sfpnt);
+  Node *transformed_sfpnt = _gvn.transform(sfpnt);
+  set_control(transformed_sfpnt);
+
+  // Provide an edge from root to safepoint.  This makes the safepoint
+  // appear useful until the parse has completed.
+  if( OptoRemoveUseless && transformed_sfpnt->is_SafePoint() ) {
+    assert(C->root() != NULL, "Expect parse is still valid");
+    C->root()->add_prec(transformed_sfpnt);
+  }
+}
+
+#ifndef PRODUCT
+//------------------------show_parse_info--------------------------------------
+void Parse::show_parse_info() {
+  InlineTree* ilt = NULL;
+  if (C->ilt() != NULL) {
+    JVMState* caller_jvms = is_osr_parse() ? caller()->caller() : caller();
+    ilt = InlineTree::find_subtree_from_root(C->ilt(), caller_jvms, method());
+  }
+  if (PrintCompilation && Verbose) {
+    if (depth() == 1) {
+      if( ilt->count_inlines() ) {
+        tty->print("    __inlined %d (%d bytes)", ilt->count_inlines(),
+                     ilt->count_inline_bcs());
+        tty->cr();
+      }
+    } else {
+      if (method()->is_synchronized())         tty->print("s");
+      if (method()->has_exception_handlers())  tty->print("!");
+      // Check this is not the final compiled version
+      if (C->trap_can_recompile()) {
+        tty->print("-");
+      } else {
+        tty->print(" ");
+      }
+      method()->print_short_name();
+      if (is_osr_parse()) {
+        tty->print(" @ %d", osr_bci());
+      }
+      tty->print(" (%d bytes)",method()->code_size());
+      if (ilt->count_inlines()) {
+        tty->print(" __inlined %d (%d bytes)", ilt->count_inlines(),
+                   ilt->count_inline_bcs());
+      }
+      tty->cr();
+    }
+  }
+  if (PrintOpto && (depth() == 1 || PrintOptoInlining)) {
+    // Print that we succeeded; suppress this message on the first osr parse.
+
+    if (method()->is_synchronized())         tty->print("s");
+    if (method()->has_exception_handlers())  tty->print("!");
+    // Check this is not the final compiled version
+    if (C->trap_can_recompile() && depth() == 1) {
+      tty->print("-");
+    } else {
+      tty->print(" ");
+    }
+    if( depth() != 1 ) { tty->print("   "); }  // missing compile count
+    for (int i = 1; i < depth(); ++i) { tty->print("  "); }
+    method()->print_short_name();
+    if (is_osr_parse()) {
+      tty->print(" @ %d", osr_bci());
+    }
+    if (ilt->caller_bci() != -1) {
+      tty->print(" @ %d", ilt->caller_bci());
+    }
+    tty->print(" (%d bytes)",method()->code_size());
+    if (ilt->count_inlines()) {
+      tty->print(" __inlined %d (%d bytes)", ilt->count_inlines(),
+                 ilt->count_inline_bcs());
+    }
+    tty->cr();
+  }
+}
+
+
+//------------------------------dump-------------------------------------------
+// Dump information associated with the bytecodes of current _method
+void Parse::dump() {
+  if( method() != NULL ) {
+    // Iterate over bytecodes
+    ciBytecodeStream iter(method());
+    for( Bytecodes::Code bc = iter.next(); bc != ciBytecodeStream::EOBC() ; bc = iter.next() ) {
+      dump_bci( iter.cur_bci() );
+      tty->cr();
+    }
+  }
+}
+
+// Dump information associated with a byte code index, 'bci'
+void Parse::dump_bci(int bci) {
+  // Output info on merge-points, cloning, and within _jsr..._ret
+  // NYI
+  tty->print(" bci:%d", bci);
+}
+
+#endif
diff --git a/src/share/vm/opto/parse2.cpp b/src/share/vm/opto/parse2.cpp
new file mode 100644
index 000000000..2a5b0da07
--- /dev/null
+++ b/src/share/vm/opto/parse2.cpp
@@ -0,0 +1,2171 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse2.cpp.incl"
+
+extern int explicit_null_checks_inserted,
+           explicit_null_checks_elided;
+
+//---------------------------------array_load----------------------------------
+void Parse::array_load(BasicType elem_type) {
+  const Type* elem = Type::TOP;
+  Node* adr = array_addressing(elem_type, 0, &elem);
+  if (stopped())  return;     // guarenteed null or range check
+  _sp -= 2;                   // Pop array and index
+  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
+  Node* ld = make_load(control(), adr, elem, elem_type, adr_type);
+  push(ld);
+}
+
+
+//--------------------------------array_store----------------------------------
+void Parse::array_store(BasicType elem_type) {
+  Node* adr = array_addressing(elem_type, 1);
+  if (stopped())  return;     // guarenteed null or range check
+  Node* val = pop();
+  _sp -= 2;                   // Pop array and index
+  const TypeAryPtr* adr_type = TypeAryPtr::get_array_body_type(elem_type);
+  store_to_memory(control(), adr, val, elem_type, adr_type);
+}
+
+
+//------------------------------array_addressing-------------------------------
+// Pull array and index from the stack.  Compute pointer-to-element.
+Node* Parse::array_addressing(BasicType type, int vals, const Type* *result2) {
+  Node *idx   = peek(0+vals);   // Get from stack without popping
+  Node *ary   = peek(1+vals);   // in case of exception
+
+  // Null check the array base, with correct stack contents
+  ary = do_null_check(ary, T_ARRAY);
+  // Compile-time detect of null-exception?
+  if (stopped())  return top();
+
+  const TypeAryPtr* arytype  = _gvn.type(ary)->is_aryptr();
+  const TypeInt*    sizetype = arytype->size();
+  const Type*       elemtype = arytype->elem();
+
+  if (UseUniqueSubclasses && result2 != NULL) {
+    const TypeInstPtr* toop = elemtype->isa_instptr();
+    if (toop) {
+      if (toop->klass()->as_instance_klass()->unique_concrete_subklass()) {
+        // If we load from "AbstractClass[]" we must see "ConcreteSubClass".
+        const Type* subklass = Type::get_const_type(toop->klass());
+        elemtype = subklass->join(elemtype);
+      }
+    }
+  }
+
+  // Check for big class initializers with all constant offsets
+  // feeding into a known-size array.
+  const TypeInt* idxtype = _gvn.type(idx)->is_int();
+  // See if the highest idx value is less than the lowest array bound,
+  // and if the idx value cannot be negative:
+  bool need_range_check = true;
+  if (idxtype->_hi < sizetype->_lo && idxtype->_lo >= 0) {
+    need_range_check = false;
+    if (C->log() != NULL)   C->log()->elem("observe that='!need_range_check'");
+  }
+
+  if (!arytype->klass()->is_loaded()) {
+    // Only fails for some -Xcomp runs
+    // The class is unloaded.  We have to run this bytecode in the interpreter.
+    uncommon_trap(Deoptimization::Reason_unloaded,
+                  Deoptimization::Action_reinterpret,
+                  arytype->klass(), "!loaded array");
+    return top();
+  }
+
+  // Do the range check
+  if (GenerateRangeChecks && need_range_check) {
+    // Range is constant in array-oop, so we can use the original state of mem
+    Node* len = load_array_length(ary);
+    // Test length vs index (standard trick using unsigned compare)
+    Node* chk = _gvn.transform( new (C, 3) CmpUNode(idx, len) );
+    BoolTest::mask btest = BoolTest::lt;
+    Node* tst = _gvn.transform( new (C, 2) BoolNode(chk, btest) );
+    // Branch to failure if out of bounds
+    { BuildCutout unless(this, tst, PROB_MAX);
+      if (C->allow_range_check_smearing()) {
+        // Do not use builtin_throw, since range checks are sometimes
+        // made more stringent by an optimistic transformation.
+        // This creates "tentative" range checks at this point,
+        // which are not guaranteed to throw exceptions.
+        // See IfNode::Ideal, is_range_check, adjust_check.
+        uncommon_trap(Deoptimization::Reason_range_check,
+                      Deoptimization::Action_make_not_entrant,
+                      NULL, "range_check");
+      } else {
+        // If we have already recompiled with the range-check-widening
+        // heroic optimization turned off, then we must really be throwing
+        // range check exceptions.
+        builtin_throw(Deoptimization::Reason_range_check, idx);
+      }
+    }
+  }
+  // Check for always knowing you are throwing a range-check exception
+  if (stopped())  return top();
+
+  Node* ptr = array_element_address( ary, idx, type, sizetype);
+
+  if (result2 != NULL)  *result2 = elemtype;
+  return ptr;
+}
+
+
+// returns IfNode
+IfNode* Parse::jump_if_fork_int(Node* a, Node* b, BoolTest::mask mask) {
+  Node   *cmp = _gvn.transform( new (C, 3) CmpINode( a, b)); // two cases: shiftcount > 32 and shiftcount <= 32
+  Node   *tst = _gvn.transform( new (C, 2) BoolNode( cmp, mask));
+  IfNode *iff = create_and_map_if( control(), tst, ((mask == BoolTest::eq) ? PROB_STATIC_INFREQUENT : PROB_FAIR), COUNT_UNKNOWN );
+  return iff;
+}
+
+// return Region node
+Node* Parse::jump_if_join(Node* iffalse, Node* iftrue) {
+  Node *region  = new (C, 3) RegionNode(3); // 2 results
+  record_for_igvn(region);
+  region->init_req(1, iffalse);
+  region->init_req(2, iftrue );
+  _gvn.set_type(region, Type::CONTROL);
+  region = _gvn.transform(region);
+  set_control (region);
+  return region;
+}
+
+
+//------------------------------helper for tableswitch-------------------------
+void Parse::jump_if_true_fork(IfNode *iff, int dest_bci_if_true, int prof_table_index) {
+  // True branch, use existing map info
+  { PreserveJVMState pjvms(this);
+    Node *iftrue  = _gvn.transform( new (C, 1) IfTrueNode (iff) );
+    set_control( iftrue );
+    profile_switch_case(prof_table_index);
+    merge_new_path(dest_bci_if_true);
+  }
+
+  // False branch
+  Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+  set_control( iffalse );
+}
+
+void Parse::jump_if_false_fork(IfNode *iff, int dest_bci_if_true, int prof_table_index) {
+  // True branch, use existing map info
+  { PreserveJVMState pjvms(this);
+    Node *iffalse  = _gvn.transform( new (C, 1) IfFalseNode (iff) );
+    set_control( iffalse );
+    profile_switch_case(prof_table_index);
+    merge_new_path(dest_bci_if_true);
+  }
+
+  // False branch
+  Node *iftrue = _gvn.transform( new (C, 1) IfTrueNode(iff) );
+  set_control( iftrue );
+}
+
+void Parse::jump_if_always_fork(int dest_bci, int prof_table_index) {
+  // False branch, use existing map and control()
+  profile_switch_case(prof_table_index);
+  merge_new_path(dest_bci);
+}
+
+
+extern "C" {
+  static int jint_cmp(const void *i, const void *j) {
+    int a = *(jint *)i;
+    int b = *(jint *)j;
+    return a > b ? 1 : a < b ? -1 : 0;
+  }
+}
+
+
+// Default value for methodData switch indexing. Must be a negative value to avoid
+// conflict with any legal switch index.
+#define NullTableIndex -1
+
+class SwitchRange : public StackObj {
+  // a range of integers coupled with a bci destination
+  jint _lo;                     // inclusive lower limit
+  jint _hi;                     // inclusive upper limit
+  int _dest;
+  int _table_index;             // index into method data table
+
+public:
+  jint lo() const              { return _lo;   }
+  jint hi() const              { return _hi;   }
+  int  dest() const            { return _dest; }
+  int  table_index() const     { return _table_index; }
+  bool is_singleton() const    { return _lo == _hi; }
+
+  void setRange(jint lo, jint hi, int dest, int table_index) {
+    assert(lo <= hi, "must be a non-empty range");
+    _lo = lo, _hi = hi; _dest = dest; _table_index = table_index;
+  }
+  bool adjoinRange(jint lo, jint hi, int dest, int table_index) {
+    assert(lo <= hi, "must be a non-empty range");
+    if (lo == _hi+1 && dest == _dest && table_index == _table_index) {
+      _hi = hi;
+      return true;
+    }
+    return false;
+  }
+
+  void set (jint value, int dest, int table_index) {
+    setRange(value, value, dest, table_index);
+  }
+  bool adjoin(jint value, int dest, int table_index) {
+    return adjoinRange(value, value, dest, table_index);
+  }
+
+  void print(ciEnv* env) {
+    if (is_singleton())
+      tty->print(" {%d}=>%d", lo(), dest());
+    else if (lo() == min_jint)
+      tty->print(" {..%d}=>%d", hi(), dest());
+    else if (hi() == max_jint)
+      tty->print(" {%d..}=>%d", lo(), dest());
+    else
+      tty->print(" {%d..%d}=>%d", lo(), hi(), dest());
+  }
+};
+
+
+//-------------------------------do_tableswitch--------------------------------
+void Parse::do_tableswitch() {
+  Node* lookup = pop();
+
+  // Get information about tableswitch
+  int default_dest = iter().get_dest_table(0);
+  int lo_index     = iter().get_int_table(1);
+  int hi_index     = iter().get_int_table(2);
+  int len          = hi_index - lo_index + 1;
+
+  if (len < 1) {
+    // If this is a backward branch, add safepoint
+    maybe_add_safepoint(default_dest);
+    merge(default_dest);
+    return;
+  }
+
+  // generate decision tree, using trichotomy when possible
+  int rnum = len+2;
+  bool makes_backward_branch = false;
+  SwitchRange* ranges = NEW_RESOURCE_ARRAY(SwitchRange, rnum);
+  int rp = -1;
+  if (lo_index != min_jint) {
+    ranges[++rp].setRange(min_jint, lo_index-1, default_dest, NullTableIndex);
+  }
+  for (int j = 0; j < len; j++) {
+    jint match_int = lo_index+j;
+    int  dest      = iter().get_dest_table(j+3);
+    makes_backward_branch |= (dest <= bci());
+    int  table_index = method_data_update() ? j : NullTableIndex;
+    if (rp < 0 || !ranges[rp].adjoin(match_int, dest, table_index)) {
+      ranges[++rp].set(match_int, dest, table_index);
+    }
+  }
+  jint highest = lo_index+(len-1);
+  assert(ranges[rp].hi() == highest, "");
+  if (highest != max_jint
+      && !ranges[rp].adjoinRange(highest+1, max_jint, default_dest, NullTableIndex)) {
+    ranges[++rp].setRange(highest+1, max_jint, default_dest, NullTableIndex);
+  }
+  assert(rp < len+2, "not too many ranges");
+
+  // Safepoint in case if backward branch observed
+  if( makes_backward_branch && UseLoopSafepoints )
+    add_safepoint();
+
+  jump_switch_ranges(lookup, &ranges[0], &ranges[rp]);
+}
+
+
+//------------------------------do_lookupswitch--------------------------------
+void Parse::do_lookupswitch() {
+  Node *lookup = pop();         // lookup value
+  // Get information about lookupswitch
+  int default_dest = iter().get_dest_table(0);
+  int len          = iter().get_int_table(1);
+
+  if (len < 1) {    // If this is a backward branch, add safepoint
+    maybe_add_safepoint(default_dest);
+    merge(default_dest);
+    return;
+  }
+
+  // generate decision tree, using trichotomy when possible
+  jint* table = NEW_RESOURCE_ARRAY(jint, len*2);
+  {
+    for( int j = 0; j < len; j++ ) {
+      table[j+j+0] = iter().get_int_table(2+j+j);
+      table[j+j+1] = iter().get_dest_table(2+j+j+1);
+    }
+    qsort( table, len, 2*sizeof(table[0]), jint_cmp );
+  }
+
+  int rnum = len*2+1;
+  bool makes_backward_branch = false;
+  SwitchRange* ranges = NEW_RESOURCE_ARRAY(SwitchRange, rnum);
+  int rp = -1;
+  for( int j = 0; j < len; j++ ) {
+    jint match_int   = table[j+j+0];
+    int  dest        = table[j+j+1];
+    int  next_lo     = rp < 0 ? min_jint : ranges[rp].hi()+1;
+    int  table_index = method_data_update() ? j : NullTableIndex;
+    makes_backward_branch |= (dest <= bci());
+    if( match_int != next_lo ) {
+      ranges[++rp].setRange(next_lo, match_int-1, default_dest, NullTableIndex);
+    }
+    if( rp < 0 || !ranges[rp].adjoin(match_int, dest, table_index) ) {
+      ranges[++rp].set(match_int, dest, table_index);
+    }
+  }
+  jint highest = table[2*(len-1)];
+  assert(ranges[rp].hi() == highest, "");
+  if( highest != max_jint
+      && !ranges[rp].adjoinRange(highest+1, max_jint, default_dest, NullTableIndex) ) {
+    ranges[++rp].setRange(highest+1, max_jint, default_dest, NullTableIndex);
+  }
+  assert(rp < rnum, "not too many ranges");
+
+  // Safepoint in case backward branch observed
+  if( makes_backward_branch && UseLoopSafepoints )
+    add_safepoint();
+
+  jump_switch_ranges(lookup, &ranges[0], &ranges[rp]);
+}
+
+//----------------------------create_jump_tables-------------------------------
+bool Parse::create_jump_tables(Node* key_val, SwitchRange* lo, SwitchRange* hi) {
+  // Are jumptables enabled
+  if (!UseJumpTables)  return false;
+
+  // Are jumptables supported
+  if (!Matcher::has_match_rule(Op_Jump))  return false;
+
+  // Don't make jump table if profiling
+  if (method_data_update())  return false;
+
+  // Decide if a guard is needed to lop off big ranges at either (or
+  // both) end(s) of the input set. We'll call this the default target
+  // even though we can't be sure that it is the true "default".
+
+  bool needs_guard = false;
+  int default_dest;
+  int64 total_outlier_size = 0;
+  int64 hi_size = ((int64)hi->hi()) - ((int64)hi->lo()) + 1;
+  int64 lo_size = ((int64)lo->hi()) - ((int64)lo->lo()) + 1;
+
+  if (lo->dest() == hi->dest()) {
+    total_outlier_size = hi_size + lo_size;
+    default_dest = lo->dest();
+  } else if (lo_size > hi_size) {
+    total_outlier_size = lo_size;
+    default_dest = lo->dest();
+  } else {
+    total_outlier_size = hi_size;
+    default_dest = hi->dest();
+  }
+
+  // If a guard test will eliminate very sparse end ranges, then
+  // it is worth the cost of an extra jump.
+  if (total_outlier_size > (MaxJumpTableSparseness * 4)) {
+    needs_guard = true;
+    if (default_dest == lo->dest()) lo++;
+    if (default_dest == hi->dest()) hi--;
+  }
+
+  // Find the total number of cases and ranges
+  int64 num_cases = ((int64)hi->hi()) - ((int64)lo->lo()) + 1;
+  int num_range = hi - lo + 1;
+
+  // Don't create table if: too large, too small, or too sparse.
+  if (num_cases < MinJumpTableSize || num_cases > MaxJumpTableSize)
+    return false;
+  if (num_cases > (MaxJumpTableSparseness * num_range))
+    return false;
+
+  // Normalize table lookups to zero
+  int lowval = lo->lo();
+  key_val = _gvn.transform( new (C, 3) SubINode(key_val, _gvn.intcon(lowval)) );
+
+  // Generate a guard to protect against input keyvals that aren't
+  // in the switch domain.
+  if (needs_guard) {
+    Node*   size = _gvn.intcon(num_cases);
+    Node*   cmp = _gvn.transform( new (C, 3) CmpUNode(key_val, size) );
+    Node*   tst = _gvn.transform( new (C, 2) BoolNode(cmp, BoolTest::ge) );
+    IfNode* iff = create_and_map_if( control(), tst, PROB_FAIR, COUNT_UNKNOWN);
+    jump_if_true_fork(iff, default_dest, NullTableIndex);
+  }
+
+  // Create an ideal node JumpTable that has projections
+  // of all possible ranges for a switch statement
+  // The key_val input must be converted to a pointer offset and scaled.
+  // Compare Parse::array_addressing above.
+#ifdef _LP64
+  // Clean the 32-bit int into a real 64-bit offset.
+  // Otherwise, the jint value 0 might turn into an offset of 0x0800000000.
+  const TypeLong* lkeytype = TypeLong::make(CONST64(0), num_cases-1, Type::WidenMin);
+  key_val       = _gvn.transform( new (C, 2) ConvI2LNode(key_val, lkeytype) );
+#endif
+  // Shift the value by wordsize so we have an index into the table, rather
+  // than a switch value
+  Node *shiftWord = _gvn.MakeConX(wordSize);
+  key_val = _gvn.transform( new (C, 3) MulXNode( key_val, shiftWord));
+
+  // Create the JumpNode
+  Node* jtn = _gvn.transform( new (C, 2) JumpNode(control(), key_val, num_cases) );
+
+  // These are the switch destinations hanging off the jumpnode
+  int i = 0;
+  for (SwitchRange* r = lo; r <= hi; r++) {
+    for (int j = r->lo(); j <= r->hi(); j++, i++) {
+      Node* input = _gvn.transform(new (C, 1) JumpProjNode(jtn, i, r->dest(), j - lowval));
+      {
+        PreserveJVMState pjvms(this);
+        set_control(input);
+        jump_if_always_fork(r->dest(), r->table_index());
+      }
+    }
+  }
+  assert(i == num_cases, "miscount of cases");
+  stop_and_kill_map();  // no more uses for this JVMS
+  return true;
+}
+
+//----------------------------jump_switch_ranges-------------------------------
+void Parse::jump_switch_ranges(Node* key_val, SwitchRange *lo, SwitchRange *hi, int switch_depth) {
+  Block* switch_block = block();
+
+  if (switch_depth == 0) {
+    // Do special processing for the top-level call.
+    assert(lo->lo() == min_jint, "initial range must exhaust Type::INT");
+    assert(hi->hi() == max_jint, "initial range must exhaust Type::INT");
+
+    // Decrement pred-numbers for the unique set of nodes.
+#ifdef ASSERT
+    // Ensure that the block's successors are a (duplicate-free) set.
+    int successors_counted = 0;  // block occurrences in [hi..lo]
+    int unique_successors = switch_block->num_successors();
+    for (int i = 0; i < unique_successors; i++) {
+      Block* target = switch_block->successor_at(i);
+
+      // Check that the set of successors is the same in both places.
+      int successors_found = 0;
+      for (SwitchRange* p = lo; p <= hi; p++) {
+        if (p->dest() == target->start())  successors_found++;
+      }
+      assert(successors_found > 0, "successor must be known");
+      successors_counted += successors_found;
+    }
+    assert(successors_counted == (hi-lo)+1, "no unexpected successors");
+#endif
+
+    // Maybe prune the inputs, based on the type of key_val.
+    jint min_val = min_jint;
+    jint max_val = max_jint;
+    const TypeInt* ti = key_val->bottom_type()->isa_int();
+    if (ti != NULL) {
+      min_val = ti->_lo;
+      max_val = ti->_hi;
+      assert(min_val <= max_val, "invalid int type");
+    }
+    while (lo->hi() < min_val)  lo++;
+    if (lo->lo() < min_val)  lo->setRange(min_val, lo->hi(), lo->dest(), lo->table_index());
+    while (hi->lo() > max_val)  hi--;
+    if (hi->hi() > max_val)  hi->setRange(hi->lo(), max_val, hi->dest(), hi->table_index());
+  }
+
+#ifndef PRODUCT
+  if (switch_depth == 0) {
+    _max_switch_depth = 0;
+    _est_switch_depth = log2_intptr((hi-lo+1)-1)+1;
+  }
+#endif
+
+  assert(lo <= hi, "must be a non-empty set of ranges");
+  if (lo == hi) {
+    jump_if_always_fork(lo->dest(), lo->table_index());
+  } else {
+    assert(lo->hi() == (lo+1)->lo()-1, "contiguous ranges");
+    assert(hi->lo() == (hi-1)->hi()+1, "contiguous ranges");
+
+    if (create_jump_tables(key_val, lo, hi)) return;
+
+    int nr = hi - lo + 1;
+
+    SwitchRange* mid = lo + nr/2;
+    // if there is an easy choice, pivot at a singleton:
+    if (nr > 3 && !mid->is_singleton() && (mid-1)->is_singleton())  mid--;
+
+    assert(lo < mid && mid <= hi, "good pivot choice");
+    assert(nr != 2 || mid == hi,   "should pick higher of 2");
+    assert(nr != 3 || mid == hi-1, "should pick middle of 3");
+
+    Node *test_val = _gvn.intcon(mid->lo());
+
+    if (mid->is_singleton()) {
+      IfNode *iff_ne = jump_if_fork_int(key_val, test_val, BoolTest::ne);
+      jump_if_false_fork(iff_ne, mid->dest(), mid->table_index());
+
+      // Special Case:  If there are exactly three ranges, and the high
+      // and low range each go to the same place, omit the "gt" test,
+      // since it will not discriminate anything.
+      bool eq_test_only = (hi == lo+2 && hi->dest() == lo->dest());
+      if (eq_test_only) {
+        assert(mid == hi-1, "");
+      }
+
+      // if there is a higher range, test for it and process it:
+      if (mid < hi && !eq_test_only) {
+        // two comparisons of same values--should enable 1 test for 2 branches
+        // Use BoolTest::le instead of BoolTest::gt
+        IfNode *iff_le  = jump_if_fork_int(key_val, test_val, BoolTest::le);
+        Node   *iftrue  = _gvn.transform( new (C, 1) IfTrueNode(iff_le) );
+        Node   *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff_le) );
+        { PreserveJVMState pjvms(this);
+          set_control(iffalse);
+          jump_switch_ranges(key_val, mid+1, hi, switch_depth+1);
+        }
+        set_control(iftrue);
+      }
+
+    } else {
+      // mid is a range, not a singleton, so treat mid..hi as a unit
+      IfNode *iff_ge = jump_if_fork_int(key_val, test_val, BoolTest::ge);
+
+      // if there is a higher range, test for it and process it:
+      if (mid == hi) {
+        jump_if_true_fork(iff_ge, mid->dest(), mid->table_index());
+      } else {
+        Node *iftrue  = _gvn.transform( new (C, 1) IfTrueNode(iff_ge) );
+        Node *iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff_ge) );
+        { PreserveJVMState pjvms(this);
+          set_control(iftrue);
+          jump_switch_ranges(key_val, mid, hi, switch_depth+1);
+        }
+        set_control(iffalse);
+      }
+    }
+
+    // in any case, process the lower range
+    jump_switch_ranges(key_val, lo, mid-1, switch_depth+1);
+  }
+
+  // Decrease pred_count for each successor after all is done.
+  if (switch_depth == 0) {
+    int unique_successors = switch_block->num_successors();
+    for (int i = 0; i < unique_successors; i++) {
+      Block* target = switch_block->successor_at(i);
+      // Throw away the pre-allocated path for each unique successor.
+      target->next_path_num();
+    }
+  }
+
+#ifndef PRODUCT
+  _max_switch_depth = MAX2(switch_depth, _max_switch_depth);
+  if (TraceOptoParse && Verbose && WizardMode && switch_depth == 0) {
+    SwitchRange* r;
+    int nsing = 0;
+    for( r = lo; r <= hi; r++ ) {
+      if( r->is_singleton() )  nsing++;
+    }
+    tty->print(">>> ");
+    _method->print_short_name();
+    tty->print_cr(" switch decision tree");
+    tty->print_cr("    %d ranges (%d singletons), max_depth=%d, est_depth=%d",
+                  hi-lo+1, nsing, _max_switch_depth, _est_switch_depth);
+    if (_max_switch_depth > _est_switch_depth) {
+      tty->print_cr("******** BAD SWITCH DEPTH ********");
+    }
+    tty->print("   ");
+    for( r = lo; r <= hi; r++ ) {
+      r->print(env());
+    }
+    tty->print_cr("");
+  }
+#endif
+}
+
+void Parse::modf() {
+  Node *f2 = pop();
+  Node *f1 = pop();
+  Node* c = make_runtime_call(RC_LEAF, OptoRuntime::modf_Type(),
+                              CAST_FROM_FN_PTR(address, SharedRuntime::frem),
+                              "frem", NULL, //no memory effects
+                              f1, f2);
+  Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+  push(res);
+}
+
+void Parse::modd() {
+  Node *d2 = pop_pair();
+  Node *d1 = pop_pair();
+  Node* c = make_runtime_call(RC_LEAF, OptoRuntime::Math_DD_D_Type(),
+                              CAST_FROM_FN_PTR(address, SharedRuntime::drem),
+                              "drem", NULL, //no memory effects
+                              d1, top(), d2, top());
+  Node* res_d   = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+#ifdef ASSERT
+  Node* res_top = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 1));
+  assert(res_top == top(), "second value must be top");
+#endif
+
+  push_pair(res_d);
+}
+
+void Parse::l2f() {
+  Node* f2 = pop();
+  Node* f1 = pop();
+  Node* c = make_runtime_call(RC_LEAF, OptoRuntime::l2f_Type(),
+                              CAST_FROM_FN_PTR(address, SharedRuntime::l2f),
+                              "l2f", NULL, //no memory effects
+                              f1, f2);
+  Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms + 0));
+
+  push(res);
+}
+
+void Parse::do_irem() {
+  // Must keep both values on the expression-stack during null-check
+  do_null_check(peek(), T_INT);
+  // Compile-time detect of null-exception?
+  if (stopped())  return;
+
+  Node* b = pop();
+  Node* a = pop();
+
+  const Type *t = _gvn.type(b);
+  if (t != Type::TOP) {
+    const TypeInt *ti = t->is_int();
+    if (ti->is_con()) {
+      int divisor = ti->get_con();
+      // check for positive power of 2
+      if (divisor > 0 &&
+          (divisor & ~(divisor-1)) == divisor) {
+        // yes !
+        Node *mask = _gvn.intcon((divisor - 1));
+        // Sigh, must handle negative dividends
+        Node *zero = _gvn.intcon(0);
+        IfNode *ifff = jump_if_fork_int(a, zero, BoolTest::lt);
+        Node *iff = _gvn.transform( new (C, 1) IfFalseNode(ifff) );
+        Node *ift = _gvn.transform( new (C, 1) IfTrueNode (ifff) );
+        Node *reg = jump_if_join(ift, iff);
+        Node *phi = PhiNode::make(reg, NULL, TypeInt::INT);
+        // Negative path; negate/and/negate
+        Node *neg = _gvn.transform( new (C, 3) SubINode(zero, a) );
+        Node *andn= _gvn.transform( new (C, 3) AndINode(neg, mask) );
+        Node *negn= _gvn.transform( new (C, 3) SubINode(zero, andn) );
+        phi->init_req(1, negn);
+        // Fast positive case
+        Node *andx = _gvn.transform( new (C, 3) AndINode(a, mask) );
+        phi->init_req(2, andx);
+        // Push the merge
+        push( _gvn.transform(phi) );
+        return;
+      }
+    }
+  }
+  // Default case
+  push( _gvn.transform( new (C, 3) ModINode(control(),a,b) ) );
+}
+
+// Handle jsr and jsr_w bytecode
+void Parse::do_jsr() {
+  assert(bc() == Bytecodes::_jsr || bc() == Bytecodes::_jsr_w, "wrong bytecode");
+
+  // Store information about current state, tagged with new _jsr_bci
+  int return_bci = iter().next_bci();
+  int jsr_bci    = (bc() == Bytecodes::_jsr) ? iter().get_dest() : iter().get_far_dest();
+
+  // Update method data
+  profile_taken_branch(jsr_bci);
+
+  // The way we do things now, there is only one successor block
+  // for the jsr, because the target code is cloned by ciTypeFlow.
+  Block* target = successor_for_bci(jsr_bci);
+
+  // What got pushed?
+  const Type* ret_addr = target->peek();
+  assert(ret_addr->singleton(), "must be a constant (cloned jsr body)");
+
+  // Effect on jsr on stack
+  push(_gvn.makecon(ret_addr));
+
+  // Flow to the jsr.
+  merge(jsr_bci);
+}
+
+// Handle ret bytecode
+void Parse::do_ret() {
+  // Find to whom we return.
+#if 0 // %%%% MAKE THIS WORK
+  Node* con = local();
+  const TypePtr* tp = con->bottom_type()->isa_ptr();
+  assert(tp && tp->singleton(), "");
+  int return_bci = (int) tp->get_con();
+  merge(return_bci);
+#else
+  assert(block()->num_successors() == 1, "a ret can only go one place now");
+  Block* target = block()->successor_at(0);
+  assert(!target->is_ready(), "our arrival must be expected");
+  profile_ret(target->flow()->start());
+  int pnum = target->next_path_num();
+  merge_common(target, pnum);
+#endif
+}
+
+//--------------------------dynamic_branch_prediction--------------------------
+// Try to gather dynamic branch prediction behavior.  Return a probability
+// of the branch being taken and set the "cnt" field.  Returns a -1.0
+// if we need to use static prediction for some reason.
+float Parse::dynamic_branch_prediction(float &cnt) {
+  ResourceMark rm;
+
+  cnt  = COUNT_UNKNOWN;
+
+  // Use MethodData information if it is available
+  // FIXME: free the ProfileData structure
+  ciMethodData* methodData = method()->method_data();
+  if (!methodData->is_mature())  return PROB_UNKNOWN;
+  ciProfileData* data = methodData->bci_to_data(bci());
+  if (!data->is_JumpData())  return PROB_UNKNOWN;
+
+  // get taken and not taken values
+  int     taken = data->as_JumpData()->taken();
+  int not_taken = 0;
+  if (data->is_BranchData()) {
+    not_taken = data->as_BranchData()->not_taken();
+  }
+
+  // scale the counts to be commensurate with invocation counts:
+  taken = method()->scale_count(taken);
+  not_taken = method()->scale_count(not_taken);
+
+  // Give up if too few counts to be meaningful
+  if (taken + not_taken < 40) {
+    if (C->log() != NULL) {
+      C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d'", iter().get_dest(), taken, not_taken);
+    }
+    return PROB_UNKNOWN;
+  }
+
+  // Compute frequency that we arrive here
+  int sum = taken + not_taken;
+  // Adjust, if this block is a cloned private block but the
+  // Jump counts are shared.  Taken the private counts for
+  // just this path instead of the shared counts.
+  if( block()->count() > 0 )
+    sum = block()->count();
+  cnt = (float)sum / (float)FreqCountInvocations;
+
+  // Pin probability to sane limits
+  float prob;
+  if( !taken )
+    prob = (0+PROB_MIN) / 2;
+  else if( !not_taken )
+    prob = (1+PROB_MAX) / 2;
+  else {                         // Compute probability of true path
+    prob = (float)taken / (float)(taken + not_taken);
+    if (prob > PROB_MAX)  prob = PROB_MAX;
+    if (prob < PROB_MIN)   prob = PROB_MIN;
+  }
+
+  assert((cnt > 0.0f) && (prob > 0.0f),
+         "Bad frequency assignment in if");
+
+  if (C->log() != NULL) {
+    const char* prob_str = NULL;
+    if (prob >= PROB_MAX)  prob_str = (prob == PROB_MAX) ? "max" : "always";
+    if (prob <= PROB_MIN)  prob_str = (prob == PROB_MIN) ? "min" : "never";
+    char prob_str_buf[30];
+    if (prob_str == NULL) {
+      sprintf(prob_str_buf, "%g", prob);
+      prob_str = prob_str_buf;
+    }
+    C->log()->elem("branch target_bci='%d' taken='%d' not_taken='%d' cnt='%g' prob='%s'",
+                   iter().get_dest(), taken, not_taken, cnt, prob_str);
+  }
+  return prob;
+}
+
+//-----------------------------branch_prediction-------------------------------
+float Parse::branch_prediction(float& cnt,
+                               BoolTest::mask btest,
+                               int target_bci) {
+  float prob = dynamic_branch_prediction(cnt);
+  // If prob is unknown, switch to static prediction
+  if (prob != PROB_UNKNOWN)  return prob;
+
+  prob = PROB_FAIR;                   // Set default value
+  if (btest == BoolTest::eq)          // Exactly equal test?
+    prob = PROB_STATIC_INFREQUENT;    // Assume its relatively infrequent
+  else if (btest == BoolTest::ne)
+    prob = PROB_STATIC_FREQUENT;      // Assume its relatively frequent
+
+  // If this is a conditional test guarding a backwards branch,
+  // assume its a loop-back edge.  Make it a likely taken branch.
+  if (target_bci < bci()) {
+    if (is_osr_parse()) {    // Could be a hot OSR'd loop; force deopt
+      // Since it's an OSR, we probably have profile data, but since
+      // branch_prediction returned PROB_UNKNOWN, the counts are too small.
+      // Let's make a special check here for completely zero counts.
+      ciMethodData* methodData = method()->method_data();
+      if (!methodData->is_empty()) {
+        ciProfileData* data = methodData->bci_to_data(bci());
+        // Only stop for truly zero counts, which mean an unknown part
+        // of the OSR-ed method, and we want to deopt to gather more stats.
+        // If you have ANY counts, then this loop is simply 'cold' relative
+        // to the OSR loop.
+        if (data->as_BranchData()->taken() +
+            data->as_BranchData()->not_taken() == 0 ) {
+          // This is the only way to return PROB_UNKNOWN:
+          return PROB_UNKNOWN;
+        }
+      }
+    }
+    prob = PROB_STATIC_FREQUENT;     // Likely to take backwards branch
+  }
+
+  assert(prob != PROB_UNKNOWN, "must have some guess at this point");
+  return prob;
+}
+
+// The magic constants are chosen so as to match the output of
+// branch_prediction() when the profile reports a zero taken count.
+// It is important to distinguish zero counts unambiguously, because
+// some branches (e.g., _213_javac.Assembler.eliminate) validly produce
+// very small but nonzero probabilities, which if confused with zero
+// counts would keep the program recompiling indefinitely.
+bool Parse::seems_never_taken(float prob) {
+  return prob < PROB_MIN;
+}
+
+inline void Parse::repush_if_args() {
+#ifndef PRODUCT
+  if (PrintOpto && WizardMode) {
+    tty->print("defending against excessive implicit null exceptions on %s @%d in ",
+               Bytecodes::name(iter().cur_bc()), iter().cur_bci());
+    method()->print_name(); tty->cr();
+  }
+#endif
+  int bc_depth = - Bytecodes::depth(iter().cur_bc());
+  assert(bc_depth == 1 || bc_depth == 2, "only two kinds of branches");
+  DEBUG_ONLY(sync_jvms());   // argument(n) requires a synced jvms
+  assert(argument(0) != NULL, "must exist");
+  assert(bc_depth == 1 || argument(1) != NULL, "two must exist");
+  _sp += bc_depth;
+}
+
+//----------------------------------do_ifnull----------------------------------
+void Parse::do_ifnull(BoolTest::mask btest) {
+  int target_bci = iter().get_dest();
+
+  float cnt;
+  float prob = branch_prediction(cnt, btest, target_bci);
+  if (prob == PROB_UNKNOWN) {
+    // (An earlier version of do_ifnull omitted this trap for OSR methods.)
+#ifndef PRODUCT
+    if (PrintOpto && Verbose)
+      tty->print_cr("Never-taken backedge stops compilation at bci %d",bci());
+#endif
+    repush_if_args(); // to gather stats on loop
+    // We need to mark this branch as taken so that if we recompile we will
+    // see that it is possible. In the tiered system the interpreter doesn't
+    // do profiling and by the time we get to the lower tier from the interpreter
+    // the path may be cold again. Make sure it doesn't look untaken
+    profile_taken_branch(target_bci, !ProfileInterpreter);
+    uncommon_trap(Deoptimization::Reason_unreached,
+                  Deoptimization::Action_reinterpret,
+                  NULL, "cold");
+    return;
+  }
+
+  // If this is a backwards branch in the bytecodes, add Safepoint
+  maybe_add_safepoint(target_bci);
+  Block* branch_block = successor_for_bci(target_bci);
+  Block* next_block   = successor_for_bci(iter().next_bci());
+
+  explicit_null_checks_inserted++;
+  Node* a = null();
+  Node* b = pop();
+  Node* c = _gvn.transform( new (C, 3) CmpPNode(b, a) );
+
+  // Make a cast-away-nullness that is control dependent on the test
+  const Type *t = _gvn.type(b);
+  const Type *t_not_null = t->join(TypePtr::NOTNULL);
+  Node *cast = new (C, 2) CastPPNode(b,t_not_null);
+
+  // Generate real control flow
+  Node   *tst = _gvn.transform( new (C, 2) BoolNode( c, btest ) );
+
+  // Sanity check the probability value
+  assert(prob > 0.0f,"Bad probability in Parser");
+ // Need xform to put node in hash table
+  IfNode *iff = create_and_xform_if( control(), tst, prob, cnt );
+  assert(iff->_prob > 0.0f,"Optimizer made bad probability in parser");
+  // True branch
+  { PreserveJVMState pjvms(this);
+    Node* iftrue  = _gvn.transform( new (C, 1) IfTrueNode (iff) );
+    set_control(iftrue);
+
+    if (stopped()) {            // Path is dead?
+      explicit_null_checks_elided++;
+    } else {                    // Path is live.
+      // Update method data
+      profile_taken_branch(target_bci);
+      adjust_map_after_if(btest, c, prob, branch_block, next_block);
+      if (!stopped())
+        merge(target_bci);
+    }
+  }
+
+  // False branch
+  Node* iffalse = _gvn.transform( new (C, 1) IfFalseNode(iff) );
+  set_control(iffalse);
+
+  if (stopped()) {              // Path is dead?
+    explicit_null_checks_elided++;
+  } else  {                     // Path is live.
+    // Update method data
+    profile_not_taken_branch();
+    adjust_map_after_if(BoolTest(btest).negate(), c, 1.0-prob,
+                        next_block, branch_block);
+  }
+}
+
+//------------------------------------do_if------------------------------------
+void Parse::do_if(BoolTest::mask btest, Node* c) {
+  int target_bci = iter().get_dest();
+
+  float cnt;
+  float prob = branch_prediction(cnt, btest, target_bci);
+  float untaken_prob = 1.0 - prob;
+
+  if (prob == PROB_UNKNOWN) {
+#ifndef PRODUCT
+    if (PrintOpto && Verbose)
+      tty->print_cr("Never-taken backedge stops compilation at bci %d",bci());
+#endif
+    repush_if_args(); // to gather stats on loop
+    // We need to mark this branch as taken so that if we recompile we will
+    // see that it is possible. In the tiered system the interpreter doesn't
+    // do profiling and by the time we get to the lower tier from the interpreter
+    // the path may be cold again. Make sure it doesn't look untaken
+    profile_taken_branch(target_bci, !ProfileInterpreter);
+    uncommon_trap(Deoptimization::Reason_unreached,
+                  Deoptimization::Action_reinterpret,
+                  NULL, "cold");
+    return;
+  }
+
+  // Sanity check the probability value
+  assert(0.0f < prob && prob < 1.0f,"Bad probability in Parser");
+
+  bool taken_if_true = true;
+  // Convert BoolTest to canonical form:
+  if (!BoolTest(btest).is_canonical()) {
+    btest         = BoolTest(btest).negate();
+    taken_if_true = false;
+    // prob is NOT updated here; it remains the probability of the taken
+    // path (as opposed to the prob of the path guarded by an 'IfTrueNode').
+  }
+  assert(btest != BoolTest::eq, "!= is the only canonical exact test");
+
+  Node* tst0 = new (C, 2) BoolNode(c, btest);
+  Node* tst = _gvn.transform(tst0);
+  BoolTest::mask taken_btest   = BoolTest::illegal;
+  BoolTest::mask untaken_btest = BoolTest::illegal;
+  if (btest == BoolTest::ne) {
+    // For now, these are the only cases of btest that matter.  (More later.)
+    taken_btest   = taken_if_true ?        btest : BoolTest::eq;
+    untaken_btest = taken_if_true ? BoolTest::eq :        btest;
+  }
+
+  // Generate real control flow
+  float true_prob = (taken_if_true ? prob : untaken_prob);
+  IfNode* iff = create_and_map_if(control(), tst, true_prob, cnt);
+  assert(iff->_prob > 0.0f,"Optimizer made bad probability in parser");
+  Node* taken_branch   = new (C, 1) IfTrueNode(iff);
+  Node* untaken_branch = new (C, 1) IfFalseNode(iff);
+  if (!taken_if_true) {  // Finish conversion to canonical form
+    Node* tmp      = taken_branch;
+    taken_branch   = untaken_branch;
+    untaken_branch = tmp;
+  }
+
+  Block* branch_block = successor_for_bci(target_bci);
+  Block* next_block   = successor_for_bci(iter().next_bci());
+
+  // Branch is taken:
+  { PreserveJVMState pjvms(this);
+    taken_branch = _gvn.transform(taken_branch);
+    set_control(taken_branch);
+
+    if (!stopped()) {
+      // Update method data
+      profile_taken_branch(target_bci);
+      adjust_map_after_if(taken_btest, c, prob, branch_block, next_block);
+      if (!stopped())
+        merge(target_bci);
+    }
+  }
+
+  untaken_branch = _gvn.transform(untaken_branch);
+  set_control(untaken_branch);
+
+  // Branch not taken.
+  if (!stopped()) {
+    // Update method data
+    profile_not_taken_branch();
+    adjust_map_after_if(untaken_btest, c, untaken_prob,
+                        next_block, branch_block);
+  }
+}
+
+//----------------------------adjust_map_after_if------------------------------
+// Adjust the JVM state to reflect the result of taking this path.
+// Basically, it means inspecting the CmpNode controlling this
+// branch, seeing how it constrains a tested value, and then
+// deciding if it's worth our while to encode this constraint
+// as graph nodes in the current abstract interpretation map.
+void Parse::adjust_map_after_if(BoolTest::mask btest, Node* c, float prob,
+                                Block* path, Block* other_path) {
+  if (stopped() || !c->is_Cmp() || btest == BoolTest::illegal)
+    return;                             // nothing to do
+
+  bool is_fallthrough = (path == successor_for_bci(iter().next_bci()));
+
+  int cop = c->Opcode();
+  if (seems_never_taken(prob) && cop == Op_CmpP && btest == BoolTest::eq) {
+    // (An earlier version of do_if omitted '&& btest == BoolTest::eq'.)
+    //
+    // If this might possibly turn into an implicit null check,
+    // and the null has never yet been seen, we need to generate
+    // an uncommon trap, so as to recompile instead of suffering
+    // with very slow branches.  (We'll get the slow branches if
+    // the program ever changes phase and starts seeing nulls here.)
+    //
+    // The tests we worry about are of the form (p == null).
+    // We do not simply inspect for a null constant, since a node may
+    // optimize to 'null' later on.
+    repush_if_args();
+    // We need to mark this branch as taken so that if we recompile we will
+    // see that it is possible. In the tiered system the interpreter doesn't
+    // do profiling and by the time we get to the lower tier from the interpreter
+    // the path may be cold again. Make sure it doesn't look untaken
+    if (is_fallthrough) {
+      profile_not_taken_branch(!ProfileInterpreter);
+    } else {
+      profile_taken_branch(iter().get_dest(), !ProfileInterpreter);
+    }
+    uncommon_trap(Deoptimization::Reason_unreached,
+                  Deoptimization::Action_reinterpret,
+                  NULL,
+                  (is_fallthrough ? "taken always" : "taken never"));
+    return;
+  }
+
+  Node* val = c->in(1);
+  Node* con = c->in(2);
+  const Type* tcon = _gvn.type(con);
+  const Type* tval = _gvn.type(val);
+  bool have_con = tcon->singleton();
+  if (tval->singleton()) {
+    if (!have_con) {
+      // Swap, so constant is in con.
+      con  = val;
+      tcon = tval;
+      val  = c->in(2);
+      tval = _gvn.type(val);
+      btest = BoolTest(btest).commute();
+      have_con = true;
+    } else {
+      // Do we have two constants?  Then leave well enough alone.
+      have_con = false;
+    }
+  }
+  if (!have_con)                        // remaining adjustments need a con
+    return;
+
+
+  int val_in_map = map()->find_edge(val);
+  if (val_in_map < 0)  return;          // replace_in_map would be useless
+  {
+    JVMState* jvms = this->jvms();
+    if (!(jvms->is_loc(val_in_map) ||
+          jvms->is_stk(val_in_map)))
+      return;                           // again, it would be useless
+  }
+
+  // Check for a comparison to a constant, and "know" that the compared
+  // value is constrained on this path.
+  assert(tcon->singleton(), "");
+  ConstraintCastNode* ccast = NULL;
+  Node* cast = NULL;
+
+  switch (btest) {
+  case BoolTest::eq:                    // Constant test?
+    {
+      const Type* tboth = tcon->join(tval);
+      if (tboth == tval)  break;        // Nothing to gain.
+      if (tcon->isa_int()) {
+        ccast = new (C, 2) CastIINode(val, tboth);
+      } else if (tcon == TypePtr::NULL_PTR) {
+        // Cast to null, but keep the pointer identity temporarily live.
+        ccast = new (C, 2) CastPPNode(val, tboth);
+      } else {
+        const TypeF* tf = tcon->isa_float_constant();
+        const TypeD* td = tcon->isa_double_constant();
+        // Exclude tests vs float/double 0 as these could be
+        // either +0 or -0.  Just because you are equal to +0
+        // doesn't mean you ARE +0!
+        if ((!tf || tf->_f != 0.0) &&
+            (!td || td->_d != 0.0))
+          cast = con;                   // Replace non-constant val by con.
+      }
+    }
+    break;
+
+  case BoolTest::ne:
+    if (tcon == TypePtr::NULL_PTR) {
+      cast = cast_not_null(val, false);
+    }
+    break;
+
+  default:
+    // (At this point we could record int range types with CastII.)
+    break;
+  }
+
+  if (ccast != NULL) {
+    const Type* tcc = ccast->as_Type()->type();
+    assert(tcc != tval && tcc->higher_equal(tval), "must improve");
+    // Delay transform() call to allow recovery of pre-cast value
+    // at the control merge.
+    ccast->set_req(0, control());
+    _gvn.set_type_bottom(ccast);
+    record_for_igvn(ccast);
+    cast = ccast;
+  }
+
+  if (cast != NULL) {                   // Here's the payoff.
+    replace_in_map(val, cast);
+  }
+}
+
+
+//------------------------------do_one_bytecode--------------------------------
+// Parse this bytecode, and alter the Parsers JVM->Node mapping
+void Parse::do_one_bytecode() {
+  Node *a, *b, *c, *d;          // Handy temps
+  BoolTest::mask btest;
+  int i;
+
+  assert(!has_exceptions(), "bytecode entry state must be clear of throws");
+
+  if (C->check_node_count(NodeLimitFudgeFactor * 5,
+                          "out of nodes parsing method")) {
+    return;
+  }
+
+#ifdef ASSERT
+  // for setting breakpoints
+  if (TraceOptoParse) {
+    tty->print(" @");
+    dump_bci(bci());
+  }
+#endif
+
+  switch (bc()) {
+  case Bytecodes::_nop:
+    // do nothing
+    break;
+  case Bytecodes::_lconst_0:
+    push_pair(longcon(0));
+    break;
+
+  case Bytecodes::_lconst_1:
+    push_pair(longcon(1));
+    break;
+
+  case Bytecodes::_fconst_0:
+    push(zerocon(T_FLOAT));
+    break;
+
+  case Bytecodes::_fconst_1:
+    push(makecon(TypeF::ONE));
+    break;
+
+  case Bytecodes::_fconst_2:
+    push(makecon(TypeF::make(2.0f)));
+    break;
+
+  case Bytecodes::_dconst_0:
+    push_pair(zerocon(T_DOUBLE));
+    break;
+
+  case Bytecodes::_dconst_1:
+    push_pair(makecon(TypeD::ONE));
+    break;
+
+  case Bytecodes::_iconst_m1:push(intcon(-1)); break;
+  case Bytecodes::_iconst_0: push(intcon( 0)); break;
+  case Bytecodes::_iconst_1: push(intcon( 1)); break;
+  case Bytecodes::_iconst_2: push(intcon( 2)); break;
+  case Bytecodes::_iconst_3: push(intcon( 3)); break;
+  case Bytecodes::_iconst_4: push(intcon( 4)); break;
+  case Bytecodes::_iconst_5: push(intcon( 5)); break;
+  case Bytecodes::_bipush:   push(intcon( iter().get_byte())); break;
+  case Bytecodes::_sipush:   push(intcon( iter().get_short())); break;
+  case Bytecodes::_aconst_null: push(null());  break;
+  case Bytecodes::_ldc:
+  case Bytecodes::_ldc_w:
+  case Bytecodes::_ldc2_w:
+    // If the constant is unresolved, run this BC once in the interpreter.
+    if (iter().is_unresolved_string()) {
+      uncommon_trap(Deoptimization::make_trap_request
+                    (Deoptimization::Reason_unloaded,
+                     Deoptimization::Action_reinterpret,
+                     iter().get_constant_index()),
+                    NULL, "unresolved_string");
+      break;
+    } else {
+      ciConstant constant = iter().get_constant();
+      if (constant.basic_type() == T_OBJECT) {
+        ciObject* c = constant.as_object();
+        if (c->is_klass()) {
+          // The constant returned for a klass is the ciKlass for the
+          // entry.  We want the java_mirror so get it.
+          ciKlass* klass = c->as_klass();
+          if (klass->is_loaded()) {
+            constant = ciConstant(T_OBJECT, klass->java_mirror());
+          } else {
+            uncommon_trap(Deoptimization::make_trap_request
+                          (Deoptimization::Reason_unloaded,
+                           Deoptimization::Action_reinterpret,
+                           iter().get_constant_index()),
+                          NULL, "unresolved_klass");
+            break;
+          }
+        }
+      }
+      push_constant(constant);
+    }
+
+    break;
+
+  case Bytecodes::_aload_0:
+    push( local(0) );
+    break;
+  case Bytecodes::_aload_1:
+    push( local(1) );
+    break;
+  case Bytecodes::_aload_2:
+    push( local(2) );
+    break;
+  case Bytecodes::_aload_3:
+    push( local(3) );
+    break;
+  case Bytecodes::_aload:
+    push( local(iter().get_index()) );
+    break;
+
+  case Bytecodes::_fload_0:
+  case Bytecodes::_iload_0:
+    push( local(0) );
+    break;
+  case Bytecodes::_fload_1:
+  case Bytecodes::_iload_1:
+    push( local(1) );
+    break;
+  case Bytecodes::_fload_2:
+  case Bytecodes::_iload_2:
+    push( local(2) );
+    break;
+  case Bytecodes::_fload_3:
+  case Bytecodes::_iload_3:
+    push( local(3) );
+    break;
+  case Bytecodes::_fload:
+  case Bytecodes::_iload:
+    push( local(iter().get_index()) );
+    break;
+  case Bytecodes::_lload_0:
+    push_pair_local( 0 );
+    break;
+  case Bytecodes::_lload_1:
+    push_pair_local( 1 );
+    break;
+  case Bytecodes::_lload_2:
+    push_pair_local( 2 );
+    break;
+  case Bytecodes::_lload_3:
+    push_pair_local( 3 );
+    break;
+  case Bytecodes::_lload:
+    push_pair_local( iter().get_index() );
+    break;
+
+  case Bytecodes::_dload_0:
+    push_pair_local(0);
+    break;
+  case Bytecodes::_dload_1:
+    push_pair_local(1);
+    break;
+  case Bytecodes::_dload_2:
+    push_pair_local(2);
+    break;
+  case Bytecodes::_dload_3:
+    push_pair_local(3);
+    break;
+  case Bytecodes::_dload:
+    push_pair_local(iter().get_index());
+    break;
+  case Bytecodes::_fstore_0:
+  case Bytecodes::_istore_0:
+  case Bytecodes::_astore_0:
+    set_local( 0, pop() );
+    break;
+  case Bytecodes::_fstore_1:
+  case Bytecodes::_istore_1:
+  case Bytecodes::_astore_1:
+    set_local( 1, pop() );
+    break;
+  case Bytecodes::_fstore_2:
+  case Bytecodes::_istore_2:
+  case Bytecodes::_astore_2:
+    set_local( 2, pop() );
+    break;
+  case Bytecodes::_fstore_3:
+  case Bytecodes::_istore_3:
+  case Bytecodes::_astore_3:
+    set_local( 3, pop() );
+    break;
+  case Bytecodes::_fstore:
+  case Bytecodes::_istore:
+  case Bytecodes::_astore:
+    set_local( iter().get_index(), pop() );
+    break;
+  // long stores
+  case Bytecodes::_lstore_0:
+    set_pair_local( 0, pop_pair() );
+    break;
+  case Bytecodes::_lstore_1:
+    set_pair_local( 1, pop_pair() );
+    break;
+  case Bytecodes::_lstore_2:
+    set_pair_local( 2, pop_pair() );
+    break;
+  case Bytecodes::_lstore_3:
+    set_pair_local( 3, pop_pair() );
+    break;
+  case Bytecodes::_lstore:
+    set_pair_local( iter().get_index(), pop_pair() );
+    break;
+
+  // double stores
+  case Bytecodes::_dstore_0:
+    set_pair_local( 0, dstore_rounding(pop_pair()) );
+    break;
+  case Bytecodes::_dstore_1:
+    set_pair_local( 1, dstore_rounding(pop_pair()) );
+    break;
+  case Bytecodes::_dstore_2:
+    set_pair_local( 2, dstore_rounding(pop_pair()) );
+    break;
+  case Bytecodes::_dstore_3:
+    set_pair_local( 3, dstore_rounding(pop_pair()) );
+    break;
+  case Bytecodes::_dstore:
+    set_pair_local( iter().get_index(), dstore_rounding(pop_pair()) );
+    break;
+
+  case Bytecodes::_pop:  _sp -= 1;   break;
+  case Bytecodes::_pop2: _sp -= 2;   break;
+  case Bytecodes::_swap:
+    a = pop();
+    b = pop();
+    push(a);
+    push(b);
+    break;
+  case Bytecodes::_dup:
+    a = pop();
+    push(a);
+    push(a);
+    break;
+  case Bytecodes::_dup_x1:
+    a = pop();
+    b = pop();
+    push( a );
+    push( b );
+    push( a );
+    break;
+  case Bytecodes::_dup_x2:
+    a = pop();
+    b = pop();
+    c = pop();
+    push( a );
+    push( c );
+    push( b );
+    push( a );
+    break;
+  case Bytecodes::_dup2:
+    a = pop();
+    b = pop();
+    push( b );
+    push( a );
+    push( b );
+    push( a );
+    break;
+
+  case Bytecodes::_dup2_x1:
+    // before: .. c, b, a
+    // after:  .. b, a, c, b, a
+    // not tested
+    a = pop();
+    b = pop();
+    c = pop();
+    push( b );
+    push( a );
+    push( c );
+    push( b );
+    push( a );
+    break;
+  case Bytecodes::_dup2_x2:
+    // before: .. d, c, b, a
+    // after:  .. b, a, d, c, b, a
+    // not tested
+    a = pop();
+    b = pop();
+    c = pop();
+    d = pop();
+    push( b );
+    push( a );
+    push( d );
+    push( c );
+    push( b );
+    push( a );
+    break;
+
+  case Bytecodes::_arraylength: {
+    // Must do null-check with value on expression stack
+    Node *ary = do_null_check(peek(), T_ARRAY);
+    // Compile-time detect of null-exception?
+    if (stopped())  return;
+    a = pop();
+    push(load_array_length(a));
+    break;
+  }
+
+  case Bytecodes::_baload: array_load(T_BYTE);   break;
+  case Bytecodes::_caload: array_load(T_CHAR);   break;
+  case Bytecodes::_iaload: array_load(T_INT);    break;
+  case Bytecodes::_saload: array_load(T_SHORT);  break;
+  case Bytecodes::_faload: array_load(T_FLOAT);  break;
+  case Bytecodes::_aaload: array_load(T_OBJECT); break;
+  case Bytecodes::_laload: {
+    a = array_addressing(T_LONG, 0);
+    if (stopped())  return;     // guarenteed null or range check
+    _sp -= 2;                   // Pop array and index
+    push_pair( make_load(control(), a, TypeLong::LONG, T_LONG, TypeAryPtr::LONGS));
+    break;
+  }
+  case Bytecodes::_daload: {
+    a = array_addressing(T_DOUBLE, 0);
+    if (stopped())  return;     // guarenteed null or range check
+    _sp -= 2;                   // Pop array and index
+    push_pair( make_load(control(), a, Type::DOUBLE, T_DOUBLE, TypeAryPtr::DOUBLES));
+    break;
+  }
+  case Bytecodes::_bastore: array_store(T_BYTE);  break;
+  case Bytecodes::_castore: array_store(T_CHAR);  break;
+  case Bytecodes::_iastore: array_store(T_INT);   break;
+  case Bytecodes::_sastore: array_store(T_SHORT); break;
+  case Bytecodes::_fastore: array_store(T_FLOAT); break;
+  case Bytecodes::_aastore: {
+    d = array_addressing(T_OBJECT, 1);
+    if (stopped())  return;     // guarenteed null or range check
+    array_store_check();
+    c = pop();                  // Oop to store
+    b = pop();                  // index (already used)
+    a = pop();                  // the array itself
+    const Type* elemtype  = _gvn.type(a)->is_aryptr()->elem();
+    const TypeAryPtr* adr_type = TypeAryPtr::OOPS;
+    Node* store = store_oop_to_array(control(), a, d, adr_type, c, elemtype, T_OBJECT);
+    break;
+  }
+  case Bytecodes::_lastore: {
+    a = array_addressing(T_LONG, 2);
+    if (stopped())  return;     // guarenteed null or range check
+    c = pop_pair();
+    _sp -= 2;                   // Pop array and index
+    store_to_memory(control(), a, c, T_LONG, TypeAryPtr::LONGS);
+    break;
+  }
+  case Bytecodes::_dastore: {
+    a = array_addressing(T_DOUBLE, 2);
+    if (stopped())  return;     // guarenteed null or range check
+    c = pop_pair();
+    _sp -= 2;                   // Pop array and index
+    c = dstore_rounding(c);
+    store_to_memory(control(), a, c, T_DOUBLE, TypeAryPtr::DOUBLES);
+    break;
+  }
+  case Bytecodes::_getfield:
+    do_getfield();
+    break;
+
+  case Bytecodes::_getstatic:
+    do_getstatic();
+    break;
+
+  case Bytecodes::_putfield:
+    do_putfield();
+    break;
+
+  case Bytecodes::_putstatic:
+    do_putstatic();
+    break;
+
+  case Bytecodes::_irem:
+    do_irem();
+    break;
+  case Bytecodes::_idiv:
+    // Must keep both values on the expression-stack during null-check
+    do_null_check(peek(), T_INT);
+    // Compile-time detect of null-exception?
+    if (stopped())  return;
+    b = pop();
+    a = pop();
+    push( _gvn.transform( new (C, 3) DivINode(control(),a,b) ) );
+    break;
+  case Bytecodes::_imul:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) MulINode(a,b) ) );
+    break;
+  case Bytecodes::_iadd:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) AddINode(a,b) ) );
+    break;
+  case Bytecodes::_ineg:
+    a = pop();
+    push( _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),a)) );
+    break;
+  case Bytecodes::_isub:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) SubINode(a,b) ) );
+    break;
+  case Bytecodes::_iand:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) AndINode(a,b) ) );
+    break;
+  case Bytecodes::_ior:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) OrINode(a,b) ) );
+    break;
+  case Bytecodes::_ixor:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) XorINode(a,b) ) );
+    break;
+  case Bytecodes::_ishl:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) LShiftINode(a,b) ) );
+    break;
+  case Bytecodes::_ishr:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) RShiftINode(a,b) ) );
+    break;
+  case Bytecodes::_iushr:
+    b = pop(); a = pop();
+    push( _gvn.transform( new (C, 3) URShiftINode(a,b) ) );
+    break;
+
+  case Bytecodes::_fneg:
+    a = pop();
+    b = _gvn.transform(new (C, 2) NegFNode (a));
+    push(b);
+    break;
+
+  case Bytecodes::_fsub:
+    b = pop();
+    a = pop();
+    c = _gvn.transform( new (C, 3) SubFNode(a,b) );
+    d = precision_rounding(c);
+    push( d );
+    break;
+
+  case Bytecodes::_fadd:
+    b = pop();
+    a = pop();
+    c = _gvn.transform( new (C, 3) AddFNode(a,b) );
+    d = precision_rounding(c);
+    push( d );
+    break;
+
+  case Bytecodes::_fmul:
+    b = pop();
+    a = pop();
+    c = _gvn.transform( new (C, 3) MulFNode(a,b) );
+    d = precision_rounding(c);
+    push( d );
+    break;
+
+  case Bytecodes::_fdiv:
+    b = pop();
+    a = pop();
+    c = _gvn.transform( new (C, 3) DivFNode(0,a,b) );
+    d = precision_rounding(c);
+    push( d );
+    break;
+
+  case Bytecodes::_frem:
+    if (Matcher::has_match_rule(Op_ModF)) {
+      // Generate a ModF node.
+      b = pop();
+      a = pop();
+      c = _gvn.transform( new (C, 3) ModFNode(0,a,b) );
+      d = precision_rounding(c);
+      push( d );
+    }
+    else {
+      // Generate a call.
+      modf();
+    }
+    break;
+
+  case Bytecodes::_fcmpl:
+    b = pop();
+    a = pop();
+    c = _gvn.transform( new (C, 3) CmpF3Node( a, b));
+    push(c);
+    break;
+  case Bytecodes::_fcmpg:
+    b = pop();
+    a = pop();
+
+    // Same as fcmpl but need to flip the unordered case.  Swap the inputs,
+    // which negates the result sign except for unordered.  Flip the unordered
+    // as well by using CmpF3 which implements unordered-lesser instead of
+    // unordered-greater semantics.  Finally, commute the result bits.  Result
+    // is same as using a CmpF3Greater except we did it with CmpF3 alone.
+    c = _gvn.transform( new (C, 3) CmpF3Node( b, a));
+    c = _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),c) );
+    push(c);
+    break;
+
+  case Bytecodes::_f2i:
+    a = pop();
+    push(_gvn.transform(new (C, 2) ConvF2INode(a)));
+    break;
+
+  case Bytecodes::_d2i:
+    a = pop_pair();
+    b = _gvn.transform(new (C, 2) ConvD2INode(a));
+    push( b );
+    break;
+
+  case Bytecodes::_f2d:
+    a = pop();
+    b = _gvn.transform( new (C, 2) ConvF2DNode(a));
+    push_pair( b );
+    break;
+
+  case Bytecodes::_d2f:
+    a = pop_pair();
+    b = _gvn.transform( new (C, 2) ConvD2FNode(a));
+    // This breaks _227_mtrt (speed & correctness) and _222_mpegaudio (speed)
+    //b = _gvn.transform(new (C, 2) RoundFloatNode(0, b) );
+    push( b );
+    break;
+
+  case Bytecodes::_l2f:
+    if (Matcher::convL2FSupported()) {
+      a = pop_pair();
+      b = _gvn.transform( new (C, 2) ConvL2FNode(a));
+      // For i486.ad, FILD doesn't restrict precision to 24 or 53 bits.
+      // Rather than storing the result into an FP register then pushing
+      // out to memory to round, the machine instruction that implements
+      // ConvL2D is responsible for rounding.
+      // c = precision_rounding(b);
+      c = _gvn.transform(b);
+      push(c);
+    } else {
+      l2f();
+    }
+    break;
+
+  case Bytecodes::_l2d:
+    a = pop_pair();
+    b = _gvn.transform( new (C, 2) ConvL2DNode(a));
+    // For i486.ad, rounding is always necessary (see _l2f above).
+    // c = dprecision_rounding(b);
+    c = _gvn.transform(b);
+    push_pair(c);
+    break;
+
+  case Bytecodes::_f2l:
+    a = pop();
+    b = _gvn.transform( new (C, 2) ConvF2LNode(a));
+    push_pair(b);
+    break;
+
+  case Bytecodes::_d2l:
+    a = pop_pair();
+    b = _gvn.transform( new (C, 2) ConvD2LNode(a));
+    push_pair(b);
+    break;
+
+  case Bytecodes::_dsub:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) SubDNode(a,b) );
+    d = dprecision_rounding(c);
+    push_pair( d );
+    break;
+
+  case Bytecodes::_dadd:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) AddDNode(a,b) );
+    d = dprecision_rounding(c);
+    push_pair( d );
+    break;
+
+  case Bytecodes::_dmul:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) MulDNode(a,b) );
+    d = dprecision_rounding(c);
+    push_pair( d );
+    break;
+
+  case Bytecodes::_ddiv:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) DivDNode(0,a,b) );
+    d = dprecision_rounding(c);
+    push_pair( d );
+    break;
+
+  case Bytecodes::_dneg:
+    a = pop_pair();
+    b = _gvn.transform(new (C, 2) NegDNode (a));
+    push_pair(b);
+    break;
+
+  case Bytecodes::_drem:
+    if (Matcher::has_match_rule(Op_ModD)) {
+      // Generate a ModD node.
+      b = pop_pair();
+      a = pop_pair();
+      // a % b
+
+      c = _gvn.transform( new (C, 3) ModDNode(0,a,b) );
+      d = dprecision_rounding(c);
+      push_pair( d );
+    }
+    else {
+      // Generate a call.
+      modd();
+    }
+    break;
+
+  case Bytecodes::_dcmpl:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) CmpD3Node( a, b));
+    push(c);
+    break;
+
+  case Bytecodes::_dcmpg:
+    b = pop_pair();
+    a = pop_pair();
+    // Same as dcmpl but need to flip the unordered case.
+    // Commute the inputs, which negates the result sign except for unordered.
+    // Flip the unordered as well by using CmpD3 which implements
+    // unordered-lesser instead of unordered-greater semantics.
+    // Finally, negate the result bits.  Result is same as using a
+    // CmpD3Greater except we did it with CmpD3 alone.
+    c = _gvn.transform( new (C, 3) CmpD3Node( b, a));
+    c = _gvn.transform( new (C, 3) SubINode(_gvn.intcon(0),c) );
+    push(c);
+    break;
+
+
+    // Note for longs -> lo word is on TOS, hi word is on TOS - 1
+  case Bytecodes::_land:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) AndLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lor:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) OrLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lxor:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) XorLNode(a,b) );
+    push_pair(c);
+    break;
+
+  case Bytecodes::_lshl:
+    b = pop();                  // the shift count
+    a = pop_pair();             // value to be shifted
+    c = _gvn.transform( new (C, 3) LShiftLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lshr:
+    b = pop();                  // the shift count
+    a = pop_pair();             // value to be shifted
+    c = _gvn.transform( new (C, 3) RShiftLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lushr:
+    b = pop();                  // the shift count
+    a = pop_pair();             // value to be shifted
+    c = _gvn.transform( new (C, 3) URShiftLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lmul:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) MulLNode(a,b) );
+    push_pair(c);
+    break;
+
+  case Bytecodes::_lrem:
+    // Must keep both values on the expression-stack during null-check
+    assert(peek(0) == top(), "long word order");
+    do_null_check(peek(1), T_LONG);
+    // Compile-time detect of null-exception?
+    if (stopped())  return;
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) ModLNode(control(),a,b) );
+    push_pair(c);
+    break;
+
+  case Bytecodes::_ldiv:
+    // Must keep both values on the expression-stack during null-check
+    assert(peek(0) == top(), "long word order");
+    do_null_check(peek(1), T_LONG);
+    // Compile-time detect of null-exception?
+    if (stopped())  return;
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) DivLNode(control(),a,b) );
+    push_pair(c);
+    break;
+
+  case Bytecodes::_ladd:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) AddLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lsub:
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) SubLNode(a,b) );
+    push_pair(c);
+    break;
+  case Bytecodes::_lcmp:
+    // Safepoints are now inserted _before_ branches.  The long-compare
+    // bytecode painfully produces a 3-way value (-1,0,+1) which requires a
+    // slew of control flow.  These are usually followed by a CmpI vs zero and
+    // a branch; this pattern then optimizes to the obvious long-compare and
+    // branch.  However, if the branch is backwards there's a Safepoint
+    // inserted.  The inserted Safepoint captures the JVM state at the
+    // pre-branch point, i.e. it captures the 3-way value.  Thus if a
+    // long-compare is used to control a loop the debug info will force
+    // computation of the 3-way value, even though the generated code uses a
+    // long-compare and branch.  We try to rectify the situation by inserting
+    // a SafePoint here and have it dominate and kill the safepoint added at a
+    // following backwards branch.  At this point the JVM state merely holds 2
+    // longs but not the 3-way value.
+    if( UseLoopSafepoints ) {
+      switch( iter().next_bc() ) {
+      case Bytecodes::_ifgt:
+      case Bytecodes::_iflt:
+      case Bytecodes::_ifge:
+      case Bytecodes::_ifle:
+      case Bytecodes::_ifne:
+      case Bytecodes::_ifeq:
+        // If this is a backwards branch in the bytecodes, add Safepoint
+        maybe_add_safepoint(iter().next_get_dest());
+      }
+    }
+    b = pop_pair();
+    a = pop_pair();
+    c = _gvn.transform( new (C, 3) CmpL3Node( a, b ));
+    push(c);
+    break;
+
+  case Bytecodes::_lneg:
+    a = pop_pair();
+    b = _gvn.transform( new (C, 3) SubLNode(longcon(0),a));
+    push_pair(b);
+    break;
+  case Bytecodes::_l2i:
+    a = pop_pair();
+    push( _gvn.transform( new (C, 2) ConvL2INode(a)));
+    break;
+  case Bytecodes::_i2l:
+    a = pop();
+    b = _gvn.transform( new (C, 2) ConvI2LNode(a));
+    push_pair(b);
+    break;
+  case Bytecodes::_i2b:
+    // Sign extend
+    a = pop();
+    a = _gvn.transform( new (C, 3) LShiftINode(a,_gvn.intcon(24)) );
+    a = _gvn.transform( new (C, 3) RShiftINode(a,_gvn.intcon(24)) );
+    push( a );
+    break;
+  case Bytecodes::_i2s:
+    a = pop();
+    a = _gvn.transform( new (C, 3) LShiftINode(a,_gvn.intcon(16)) );
+    a = _gvn.transform( new (C, 3) RShiftINode(a,_gvn.intcon(16)) );
+    push( a );
+    break;
+  case Bytecodes::_i2c:
+    a = pop();
+    push( _gvn.transform( new (C, 3) AndINode(a,_gvn.intcon(0xFFFF)) ) );
+    break;
+
+  case Bytecodes::_i2f:
+    a = pop();
+    b = _gvn.transform( new (C, 2) ConvI2FNode(a) ) ;
+    c = precision_rounding(b);
+    push (b);
+    break;
+
+  case Bytecodes::_i2d:
+    a = pop();
+    b = _gvn.transform( new (C, 2) ConvI2DNode(a));
+    push_pair(b);
+    break;
+
+  case Bytecodes::_iinc:        // Increment local
+    i = iter().get_index();     // Get local index
+    set_local( i, _gvn.transform( new (C, 3) AddINode( _gvn.intcon(iter().get_iinc_con()), local(i) ) ) );
+    break;
+
+  // Exit points of synchronized methods must have an unlock node
+  case Bytecodes::_return:
+    return_current(NULL);
+    break;
+
+  case Bytecodes::_ireturn:
+  case Bytecodes::_areturn:
+  case Bytecodes::_freturn:
+    return_current(pop());
+    break;
+  case Bytecodes::_lreturn:
+    return_current(pop_pair());
+    break;
+  case Bytecodes::_dreturn:
+    return_current(pop_pair());
+    break;
+
+  case Bytecodes::_athrow:
+    // null exception oop throws NULL pointer exception
+    do_null_check(peek(), T_OBJECT);
+    if (stopped())  return;
+    if (JvmtiExport::can_post_exceptions()) {
+      // "Full-speed throwing" is not necessary here,
+      // since we're notifying the VM on every throw.
+      uncommon_trap(Deoptimization::Reason_unhandled,
+                    Deoptimization::Action_none);
+      return;
+    }
+    // Hook the thrown exception directly to subsequent handlers.
+    if (BailoutToInterpreterForThrows) {
+      // Keep method interpreted from now on.
+      uncommon_trap(Deoptimization::Reason_unhandled,
+                    Deoptimization::Action_make_not_compilable);
+      return;
+    }
+    add_exception_state(make_exception_state(peek()));
+    break;
+
+  case Bytecodes::_goto:   // fall through
+  case Bytecodes::_goto_w: {
+    int target_bci = (bc() == Bytecodes::_goto) ? iter().get_dest() : iter().get_far_dest();
+
+    // If this is a backwards branch in the bytecodes, add Safepoint
+    maybe_add_safepoint(target_bci);
+
+    // Update method data
+    profile_taken_branch(target_bci);
+
+    // Merge the current control into the target basic block
+    merge(target_bci);
+
+    // See if we can get some profile data and hand it off to the next block
+    Block *target_block = block()->successor_for_bci(target_bci);
+    if (target_block->pred_count() != 1)  break;
+    ciMethodData* methodData = method()->method_data();
+    if (!methodData->is_mature())  break;
+    ciProfileData* data = methodData->bci_to_data(bci());
+    assert( data->is_JumpData(), "" );
+    int taken = ((ciJumpData*)data)->taken();
+    taken = method()->scale_count(taken);
+    target_block->set_count(taken);
+    break;
+  }
+
+  case Bytecodes::_ifnull:
+    do_ifnull(BoolTest::eq);
+    break;
+  case Bytecodes::_ifnonnull:
+    do_ifnull(BoolTest::ne);
+    break;
+
+  case Bytecodes::_if_acmpeq: btest = BoolTest::eq; goto handle_if_acmp;
+  case Bytecodes::_if_acmpne: btest = BoolTest::ne; goto handle_if_acmp;
+  handle_if_acmp:
+    // If this is a backwards branch in the bytecodes, add Safepoint
+    maybe_add_safepoint(iter().get_dest());
+    a = pop();
+    b = pop();
+    c = _gvn.transform( new (C, 3) CmpPNode(b, a) );
+    do_if(btest, c);
+    break;
+
+  case Bytecodes::_ifeq: btest = BoolTest::eq; goto handle_ifxx;
+  case Bytecodes::_ifne: btest = BoolTest::ne; goto handle_ifxx;
+  case Bytecodes::_iflt: btest = BoolTest::lt; goto handle_ifxx;
+  case Bytecodes::_ifle: btest = BoolTest::le; goto handle_ifxx;
+  case Bytecodes::_ifgt: btest = BoolTest::gt; goto handle_ifxx;
+  case Bytecodes::_ifge: btest = BoolTest::ge; goto handle_ifxx;
+  handle_ifxx:
+    // If this is a backwards branch in the bytecodes, add Safepoint
+    maybe_add_safepoint(iter().get_dest());
+    a = _gvn.intcon(0);
+    b = pop();
+    c = _gvn.transform( new (C, 3) CmpINode(b, a) );
+    do_if(btest, c);
+    break;
+
+  case Bytecodes::_if_icmpeq: btest = BoolTest::eq; goto handle_if_icmp;
+  case Bytecodes::_if_icmpne: btest = BoolTest::ne; goto handle_if_icmp;
+  case Bytecodes::_if_icmplt: btest = BoolTest::lt; goto handle_if_icmp;
+  case Bytecodes::_if_icmple: btest = BoolTest::le; goto handle_if_icmp;
+  case Bytecodes::_if_icmpgt: btest = BoolTest::gt; goto handle_if_icmp;
+  case Bytecodes::_if_icmpge: btest = BoolTest::ge; goto handle_if_icmp;
+  handle_if_icmp:
+    // If this is a backwards branch in the bytecodes, add Safepoint
+    maybe_add_safepoint(iter().get_dest());
+    a = pop();
+    b = pop();
+    c = _gvn.transform( new (C, 3) CmpINode( b, a ) );
+    do_if(btest, c);
+    break;
+
+  case Bytecodes::_tableswitch:
+    do_tableswitch();
+    break;
+
+  case Bytecodes::_lookupswitch:
+    do_lookupswitch();
+    break;
+
+  case Bytecodes::_invokestatic:
+  case Bytecodes::_invokespecial:
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokeinterface:
+    do_call();
+    break;
+  case Bytecodes::_checkcast:
+    do_checkcast();
+    break;
+  case Bytecodes::_instanceof:
+    do_instanceof();
+    break;
+  case Bytecodes::_anewarray:
+    do_anewarray();
+    break;
+  case Bytecodes::_newarray:
+    do_newarray((BasicType)iter().get_index());
+    break;
+  case Bytecodes::_multianewarray:
+    do_multianewarray();
+    break;
+  case Bytecodes::_new:
+    do_new();
+    break;
+
+  case Bytecodes::_jsr:
+  case Bytecodes::_jsr_w:
+    do_jsr();
+    break;
+
+  case Bytecodes::_ret:
+    do_ret();
+    break;
+
+
+  case Bytecodes::_monitorenter:
+    do_monitor_enter();
+    break;
+
+  case Bytecodes::_monitorexit:
+    do_monitor_exit();
+    break;
+
+  case Bytecodes::_breakpoint:
+    // Breakpoint set concurrently to compile
+    // %%% use an uncommon trap?
+    C->record_failure("breakpoint in method");
+    return;
+
+  default:
+#ifndef PRODUCT
+    map()->dump(99);
+#endif
+    tty->print("\nUnhandled bytecode %s\n", Bytecodes::name(bc()) );
+    ShouldNotReachHere();
+  }
+
+#ifndef PRODUCT
+  IdealGraphPrinter *printer = IdealGraphPrinter::printer();
+  if(printer) {
+    char buffer[256];
+    sprintf(buffer, "Bytecode %d: %s", bci(), Bytecodes::name(bc()));
+    bool old = printer->traverse_outs();
+    printer->set_traverse_outs(true);
+    printer->print_method(C, buffer, 3);
+    printer->set_traverse_outs(old);
+  }
+#endif
+}
diff --git a/src/share/vm/opto/parse3.cpp b/src/share/vm/opto/parse3.cpp
new file mode 100644
index 000000000..d32053ce2
--- /dev/null
+++ b/src/share/vm/opto/parse3.cpp
@@ -0,0 +1,463 @@
+/*
+ * Copyright 1998-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parse3.cpp.incl"
+
+//=============================================================================
+// Helper methods for _get* and _put* bytecodes
+//=============================================================================
+bool Parse::static_field_ok_in_clinit(ciField *field, ciMethod *method) {
+  // Could be the field_holder's <clinit> method, or <clinit> for a subklass.
+  // Better to check now than to Deoptimize as soon as we execute
+  assert( field->is_static(), "Only check if field is static");
+  // is_being_initialized() is too generous.  It allows access to statics
+  // by threads that are not running the <clinit> before the <clinit> finishes.
+  // return field->holder()->is_being_initialized();
+
+  // The following restriction is correct but conservative.
+  // It is also desirable to allow compilation of methods called from <clinit>
+  // but this generated code will need to be made safe for execution by
+  // other threads, or the transition from interpreted to compiled code would
+  // need to be guarded.
+  ciInstanceKlass *field_holder = field->holder();
+
+  bool access_OK = false;
+  if (method->holder()->is_subclass_of(field_holder)) {
+    if (method->is_static()) {
+      if (method->name() == ciSymbol::class_initializer_name()) {
+        // OK to access static fields inside initializer
+        access_OK = true;
+      }
+    } else {
+      if (method->name() == ciSymbol::object_initializer_name()) {
+        // It's also OK to access static fields inside a constructor,
+        // because any thread calling the constructor must first have
+        // synchronized on the class by executing a '_new' bytecode.
+        access_OK = true;
+      }
+    }
+  }
+
+  return access_OK;
+
+}
+
+
+void Parse::do_field_access(bool is_get, bool is_field) {
+  bool will_link;
+  ciField* field = iter().get_field(will_link);
+  assert(will_link, "getfield: typeflow responsibility");
+
+  ciInstanceKlass* field_holder = field->holder();
+
+  if (is_field == field->is_static()) {
+    // Interpreter will throw java_lang_IncompatibleClassChangeError
+    // Check this before allowing <clinit> methods to access static fields
+    uncommon_trap(Deoptimization::Reason_unhandled,
+                  Deoptimization::Action_none);
+    return;
+  }
+
+  if (!is_field && !field_holder->is_initialized()) {
+    if (!static_field_ok_in_clinit(field, method())) {
+      uncommon_trap(Deoptimization::Reason_uninitialized,
+                    Deoptimization::Action_reinterpret,
+                    NULL, "!static_field_ok_in_clinit");
+      return;
+    }
+  }
+
+  assert(field->will_link(method()->holder(), bc()), "getfield: typeflow responsibility");
+
+  // Note:  We do not check for an unloaded field type here any more.
+
+  // Generate code for the object pointer.
+  Node* obj;
+  if (is_field) {
+    int obj_depth = is_get ? 0 : field->type()->size();
+    obj = do_null_check(peek(obj_depth), T_OBJECT);
+    // Compile-time detect of null-exception?
+    if (stopped())  return;
+
+    const TypeInstPtr *tjp = TypeInstPtr::make(TypePtr::NotNull, iter().get_declared_field_holder());
+    assert(_gvn.type(obj)->higher_equal(tjp), "cast_up is no longer needed");
+
+    if (is_get) {
+      --_sp;  // pop receiver before getting
+      do_get_xxx(tjp, obj, field, is_field);
+    } else {
+      do_put_xxx(tjp, obj, field, is_field);
+      --_sp;  // pop receiver after putting
+    }
+  } else {
+    const TypeKlassPtr* tkp = TypeKlassPtr::make(field_holder);
+    obj = _gvn.makecon(tkp);
+    if (is_get) {
+      do_get_xxx(tkp, obj, field, is_field);
+    } else {
+      do_put_xxx(tkp, obj, field, is_field);
+    }
+  }
+}
+
+
+void Parse::do_get_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field) {
+  // Does this field have a constant value?  If so, just push the value.
+  if (field->is_constant() && push_constant(field->constant_value()))  return;
+
+  ciType* field_klass = field->type();
+  bool is_vol = field->is_volatile();
+
+  // Compute address and memory type.
+  int offset = field->offset_in_bytes();
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();
+  Node *adr = basic_plus_adr(obj, obj, offset);
+  BasicType bt = field->layout_type();
+
+  // Build the resultant type of the load
+  const Type *type;
+
+  bool must_assert_null = false;
+
+  if( bt == T_OBJECT ) {
+    if (!field->type()->is_loaded()) {
+      type = TypeInstPtr::BOTTOM;
+      must_assert_null = true;
+    } else if (field->is_constant()) {
+      // This can happen if the constant oop is non-perm.
+      ciObject* con = field->constant_value().as_object();
+      // Do not "join" in the previous type; it doesn't add value,
+      // and may yield a vacuous result if the field is of interface type.
+      type = TypeOopPtr::make_from_constant(con)->isa_oopptr();
+      assert(type != NULL, "field singleton type must be consistent");
+    } else {
+      type = TypeOopPtr::make_from_klass(field_klass->as_klass());
+    }
+  } else {
+    type = Type::get_const_basic_type(bt);
+  }
+  // Build the load.
+  Node* ld = make_load(NULL, adr, type, bt, adr_type, is_vol);
+
+  // Adjust Java stack
+  if (type2size[bt] == 1)
+    push(ld);
+  else
+    push_pair(ld);
+
+  if (must_assert_null) {
+    // Do not take a trap here.  It's possible that the program
+    // will never load the field's class, and will happily see
+    // null values in this field forever.  Don't stumble into a
+    // trap for such a program, or we might get a long series
+    // of useless recompilations.  (Or, we might load a class
+    // which should not be loaded.)  If we ever see a non-null
+    // value, we will then trap and recompile.  (The trap will
+    // not need to mention the class index, since the class will
+    // already have been loaded if we ever see a non-null value.)
+    // uncommon_trap(iter().get_field_signature_index());
+#ifndef PRODUCT
+    if (PrintOpto && (Verbose || WizardMode)) {
+      method()->print_name(); tty->print_cr(" asserting nullness of field at bci: %d", bci());
+    }
+#endif
+    if (C->log() != NULL) {
+      C->log()->elem("assert_null reason='field' klass='%d'",
+                     C->log()->identify(field->type()));
+    }
+    // If there is going to be a trap, put it at the next bytecode:
+    set_bci(iter().next_bci());
+    do_null_assert(peek(), T_OBJECT);
+    set_bci(iter().cur_bci()); // put it back
+  }
+
+  // If reference is volatile, prevent following memory ops from
+  // floating up past the volatile read.  Also prevents commoning
+  // another volatile read.
+  if (field->is_volatile()) {
+    // Memory barrier includes bogus read of value to force load BEFORE membar
+    insert_mem_bar(Op_MemBarAcquire, ld);
+  }
+}
+
+void Parse::do_put_xxx(const TypePtr* obj_type, Node* obj, ciField* field, bool is_field) {
+  bool is_vol = field->is_volatile();
+  // If reference is volatile, prevent following memory ops from
+  // floating down past the volatile write.  Also prevents commoning
+  // another volatile read.
+  if (is_vol)  insert_mem_bar(Op_MemBarRelease);
+
+  // Compute address and memory type.
+  int offset = field->offset_in_bytes();
+  const TypePtr* adr_type = C->alias_type(field)->adr_type();
+  Node* adr = basic_plus_adr(obj, obj, offset);
+  BasicType bt = field->layout_type();
+  // Value to be stored
+  Node* val = type2size[bt] == 1 ? pop() : pop_pair();
+  // Round doubles before storing
+  if (bt == T_DOUBLE)  val = dstore_rounding(val);
+
+  // Store the value.
+  Node* store;
+  if (bt == T_OBJECT) {
+    const TypePtr* field_type;
+    if (!field->type()->is_loaded()) {
+      field_type = TypeInstPtr::BOTTOM;
+    } else {
+      field_type = TypeOopPtr::make_from_klass(field->type()->as_klass());
+    }
+    store = store_oop_to_object( control(), obj, adr, adr_type, val, field_type, bt);
+  } else {
+    store = store_to_memory( control(), adr, val, bt, adr_type, is_vol );
+  }
+
+  // If reference is volatile, prevent following volatiles ops from
+  // floating up before the volatile write.
+  if (is_vol) {
+    // First place the specific membar for THIS volatile index. This first
+    // membar is dependent on the store, keeping any other membars generated
+    // below from floating up past the store.
+    int adr_idx = C->get_alias_index(adr_type);
+    insert_mem_bar_volatile(Op_MemBarVolatile, adr_idx);
+
+    // Now place a membar for AliasIdxBot for the unknown yet-to-be-parsed
+    // volatile alias indices. Skip this if the membar is redundant.
+    if (adr_idx != Compile::AliasIdxBot) {
+      insert_mem_bar_volatile(Op_MemBarVolatile, Compile::AliasIdxBot);
+    }
+
+    // Finally, place alias-index-specific membars for each volatile index
+    // that isn't the adr_idx membar. Typically there's only 1 or 2.
+    for( int i = Compile::AliasIdxRaw; i < C->num_alias_types(); i++ ) {
+      if (i != adr_idx && C->alias_type(i)->is_volatile()) {
+        insert_mem_bar_volatile(Op_MemBarVolatile, i);
+      }
+    }
+  }
+
+  // If the field is final, the rules of Java say we are in <init> or <clinit>.
+  // Note the presence of writes to final non-static fields, so that we
+  // can insert a memory barrier later on to keep the writes from floating
+  // out of the constructor.
+  if (is_field && field->is_final()) {
+    set_wrote_final(true);
+  }
+}
+
+
+bool Parse::push_constant(ciConstant constant) {
+  switch (constant.basic_type()) {
+  case T_BOOLEAN:  push( intcon(constant.as_boolean()) ); break;
+  case T_INT:      push( intcon(constant.as_int())     ); break;
+  case T_CHAR:     push( intcon(constant.as_char())    ); break;
+  case T_BYTE:     push( intcon(constant.as_byte())    ); break;
+  case T_SHORT:    push( intcon(constant.as_short())   ); break;
+  case T_FLOAT:    push( makecon(TypeF::make(constant.as_float())) );  break;
+  case T_DOUBLE:   push_pair( makecon(TypeD::make(constant.as_double())) );  break;
+  case T_LONG:     push_pair( longcon(constant.as_long()) ); break;
+  case T_ARRAY:
+  case T_OBJECT: {
+    // the oop is in perm space if the ciObject "has_encoding"
+    ciObject* oop_constant = constant.as_object();
+    if (oop_constant->is_null_object()) {
+      push( zerocon(T_OBJECT) );
+      break;
+    } else if (oop_constant->has_encoding()) {
+      push( makecon(TypeOopPtr::make_from_constant(oop_constant)) );
+      break;
+    } else {
+      // we cannot inline the oop, but we can use it later to narrow a type
+      return false;
+    }
+  }
+  case T_ILLEGAL: {
+    // Invalid ciConstant returned due to OutOfMemoryError in the CI
+    assert(C->env()->failing(), "otherwise should not see this");
+    // These always occur because of object types; we are going to
+    // bail out anyway, so make the stack depths match up
+    push( zerocon(T_OBJECT) );
+    return false;
+  }
+  default:
+    ShouldNotReachHere();
+    return false;
+  }
+
+  // success
+  return true;
+}
+
+
+
+//=============================================================================
+void Parse::do_anewarray() {
+  bool will_link;
+  ciKlass* klass = iter().get_klass(will_link);
+
+  // Uncommon Trap when class that array contains is not loaded
+  // we need the loaded class for the rest of graph; do not
+  // initialize the container class (see Java spec)!!!
+  assert(will_link, "anewarray: typeflow responsibility");
+
+  ciObjArrayKlass* array_klass = ciObjArrayKlass::make(klass);
+  // Check that array_klass object is loaded
+  if (!array_klass->is_loaded()) {
+    // Generate uncommon_trap for unloaded array_class
+    uncommon_trap(Deoptimization::Reason_unloaded,
+                  Deoptimization::Action_reinterpret,
+                  array_klass);
+    return;
+  }
+
+  kill_dead_locals();
+
+  const TypeKlassPtr* array_klass_type = TypeKlassPtr::make(array_klass);
+  Node* count_val = pop();
+  Node* obj = new_array(makecon(array_klass_type), count_val);
+  push(obj);
+}
+
+
+void Parse::do_newarray(BasicType elem_type) {
+  kill_dead_locals();
+
+  Node*   count_val = pop();
+  const TypeKlassPtr* array_klass = TypeKlassPtr::make(ciTypeArrayKlass::make(elem_type));
+  Node*   obj = new_array(makecon(array_klass), count_val);
+  // Push resultant oop onto stack
+  push(obj);
+}
+
+// Expand simple expressions like new int[3][5] and new Object[2][nonConLen].
+// Also handle the degenerate 1-dimensional case of anewarray.
+Node* Parse::expand_multianewarray(ciArrayKlass* array_klass, Node* *lengths, int ndimensions) {
+  Node* length = lengths[0];
+  assert(length != NULL, "");
+  Node* array = new_array(makecon(TypeKlassPtr::make(array_klass)), length);
+  if (ndimensions > 1) {
+    jint length_con = find_int_con(length, -1);
+    guarantee(length_con >= 0, "non-constant multianewarray");
+    ciArrayKlass* array_klass_1 = array_klass->as_obj_array_klass()->element_klass()->as_array_klass();
+    const TypePtr* adr_type = TypeAryPtr::OOPS;
+    const Type*    elemtype = _gvn.type(array)->is_aryptr()->elem();
+    const intptr_t header   = arrayOopDesc::base_offset_in_bytes(T_OBJECT);
+    for (jint i = 0; i < length_con; i++) {
+      Node*    elem   = expand_multianewarray(array_klass_1, &lengths[1], ndimensions-1);
+      intptr_t offset = header + ((intptr_t)i << LogBytesPerWord);
+      Node*    eaddr  = basic_plus_adr(array, offset);
+      store_oop_to_array(control(), array, eaddr, adr_type, elem, elemtype, T_OBJECT);
+    }
+  }
+  return array;
+}
+
+void Parse::do_multianewarray() {
+  int ndimensions = iter().get_dimensions();
+
+  // the m-dimensional array
+  bool will_link;
+  ciArrayKlass* array_klass = iter().get_klass(will_link)->as_array_klass();
+  assert(will_link, "multianewarray: typeflow responsibility");
+
+  // Note:  Array classes are always initialized; no is_initialized check.
+
+  enum { MAX_DIMENSION = 5 };
+  if (ndimensions > MAX_DIMENSION || ndimensions <= 0) {
+    uncommon_trap(Deoptimization::Reason_unhandled,
+                  Deoptimization::Action_none);
+    return;
+  }
+
+  kill_dead_locals();
+
+  // get the lengths from the stack (first dimension is on top)
+  Node* length[MAX_DIMENSION+1];
+  length[ndimensions] = NULL;  // terminating null for make_runtime_call
+  int j;
+  for (j = ndimensions-1; j >= 0 ; j--) length[j] = pop();
+
+  // The original expression was of this form: new T[length0][length1]...
+  // It is often the case that the lengths are small (except the last).
+  // If that happens, use the fast 1-d creator a constant number of times.
+  const jint expand_limit = MIN2((juint)MultiArrayExpandLimit, (juint)100);
+  jint expand_count = 1;        // count of allocations in the expansion
+  jint expand_fanout = 1;       // running total fanout
+  for (j = 0; j < ndimensions-1; j++) {
+    jint dim_con = find_int_con(length[j], -1);
+    expand_fanout *= dim_con;
+    expand_count  += expand_fanout; // count the level-J sub-arrays
+    if (dim_con < 0
+        || dim_con > expand_limit
+        || expand_count > expand_limit) {
+      expand_count = 0;
+      break;
+    }
+  }
+
+  // Can use multianewarray instead of [a]newarray if only one dimension,
+  // or if all non-final dimensions are small constants.
+  if (expand_count == 1 || (1 <= expand_count && expand_count <= expand_limit)) {
+    Node* obj = expand_multianewarray(array_klass, &length[0], ndimensions);
+    push(obj);
+    return;
+  }
+
+  address fun = NULL;
+  switch (ndimensions) {
+  //case 1: Actually, there is no case 1.  It's handled by new_array.
+  case 2: fun = OptoRuntime::multianewarray2_Java(); break;
+  case 3: fun = OptoRuntime::multianewarray3_Java(); break;
+  case 4: fun = OptoRuntime::multianewarray4_Java(); break;
+  case 5: fun = OptoRuntime::multianewarray5_Java(); break;
+  default: ShouldNotReachHere();
+  };
+
+  Node* c = make_runtime_call(RC_NO_LEAF | RC_NO_IO,
+                              OptoRuntime::multianewarray_Type(ndimensions),
+                              fun, NULL, TypeRawPtr::BOTTOM,
+                              makecon(TypeKlassPtr::make(array_klass)),
+                              length[0], length[1], length[2],
+                              length[3], length[4]);
+  Node* res = _gvn.transform(new (C, 1) ProjNode(c, TypeFunc::Parms));
+
+  const Type* type = TypeOopPtr::make_from_klass_raw(array_klass);
+
+  // Improve the type:  We know it's not null, exact, and of a given length.
+  type = type->is_ptr()->cast_to_ptr_type(TypePtr::NotNull);
+  type = type->is_aryptr()->cast_to_exactness(true);
+
+  const TypeInt* ltype = _gvn.find_int_type(length[0]);
+  if (ltype != NULL)
+    type = type->is_aryptr()->cast_to_size(ltype);
+
+  // We cannot sharpen the nested sub-arrays, since the top level is mutable.
+
+  Node* cast = _gvn.transform( new (C, 2) CheckCastPPNode(control(), res, type) );
+  push(cast);
+
+  // Possible improvements:
+  // - Make a fast path for small multi-arrays.  (W/ implicit init. loops.)
+  // - Issue CastII against length[*] values, to TypeInt::POS.
+}
diff --git a/src/share/vm/opto/parseHelper.cpp b/src/share/vm/opto/parseHelper.cpp
new file mode 100644
index 000000000..d34ca998c
--- /dev/null
+++ b/src/share/vm/opto/parseHelper.cpp
@@ -0,0 +1,520 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_parseHelper.cpp.incl"
+
+//------------------------------make_dtrace_method_entry_exit ----------------
+// Dtrace -- record entry or exit of a method if compiled with dtrace support
+void GraphKit::make_dtrace_method_entry_exit(ciMethod* method, bool is_entry) {
+  const TypeFunc *call_type    = OptoRuntime::dtrace_method_entry_exit_Type();
+  address         call_address = is_entry ? CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_entry) :
+                                            CAST_FROM_FN_PTR(address, SharedRuntime::dtrace_method_exit);
+  const char     *call_name    = is_entry ? "dtrace_method_entry" : "dtrace_method_exit";
+
+  // Get base of thread-local storage area
+  Node* thread = _gvn.transform( new (C, 1) ThreadLocalNode() );
+
+  // Get method
+  const TypeInstPtr* method_type = TypeInstPtr::make(TypePtr::Constant, method->klass(), true, method, 0);
+  Node *method_node = _gvn.transform( new (C, 1) ConPNode(method_type) );
+
+  kill_dead_locals();
+
+  // For some reason, this call reads only raw memory.
+  const TypePtr* raw_adr_type = TypeRawPtr::BOTTOM;
+  make_runtime_call(RC_LEAF | RC_NARROW_MEM,
+                    call_type, call_address,
+                    call_name, raw_adr_type,
+                    thread, method_node);
+}
+
+
+//=============================================================================
+//------------------------------do_checkcast-----------------------------------
+void Parse::do_checkcast() {
+  bool will_link;
+  ciKlass* klass = iter().get_klass(will_link);
+
+  Node *obj = peek();
+
+  // Throw uncommon trap if class is not loaded or the value we are casting
+  // _from_ is not loaded, and value is not null.  If the value _is_ NULL,
+  // then the checkcast does nothing.
+  const TypeInstPtr *tp = _gvn.type(obj)->isa_instptr();
+  if (!will_link || (tp && !tp->is_loaded())) {
+    if (C->log() != NULL) {
+      if (!will_link) {
+        C->log()->elem("assert_null reason='checkcast' klass='%d'",
+                       C->log()->identify(klass));
+      }
+      if (tp && !tp->is_loaded()) {
+        // %%% Cannot happen?
+        C->log()->elem("assert_null reason='checkcast source' klass='%d'",
+                       C->log()->identify(tp->klass()));
+      }
+    }
+    do_null_assert(obj, T_OBJECT);
+    assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
+    if (!stopped()) {
+      profile_null_checkcast();
+    }
+    return;
+  }
+
+  Node *res = gen_checkcast(obj, makecon(TypeKlassPtr::make(klass)) );
+
+  // Pop from stack AFTER gen_checkcast because it can uncommon trap and
+  // the debug info has to be correct.
+  pop();
+  push(res);
+}
+
+
+//------------------------------do_instanceof----------------------------------
+void Parse::do_instanceof() {
+  if (stopped())  return;
+  // We would like to return false if class is not loaded, emitting a
+  // dependency, but Java requires instanceof to load its operand.
+
+  // Throw uncommon trap if class is not loaded
+  bool will_link;
+  ciKlass* klass = iter().get_klass(will_link);
+
+  if (!will_link) {
+    if (C->log() != NULL) {
+      C->log()->elem("assert_null reason='instanceof' klass='%d'",
+                     C->log()->identify(klass));
+    }
+    do_null_assert(peek(), T_OBJECT);
+    assert( stopped() || _gvn.type(peek())->higher_equal(TypePtr::NULL_PTR), "what's left behind is null" );
+    if (!stopped()) {
+      // The object is now known to be null.
+      // Shortcut the effect of gen_instanceof and return "false" directly.
+      pop();                   // pop the null
+      push(_gvn.intcon(0));    // push false answer
+    }
+    return;
+  }
+
+  // Push the bool result back on stack
+  push( gen_instanceof( pop(), makecon(TypeKlassPtr::make(klass)) ) );
+}
+
+//------------------------------array_store_check------------------------------
+// pull array from stack and check that the store is valid
+void Parse::array_store_check() {
+
+  // Shorthand access to array store elements
+  Node *obj = stack(_sp-1);
+  Node *idx = stack(_sp-2);
+  Node *ary = stack(_sp-3);
+
+  if (_gvn.type(obj) == TypePtr::NULL_PTR) {
+    // There's never a type check on null values.
+    // This cutout lets us avoid the uncommon_trap(Reason_array_check)
+    // below, which turns into a performance liability if the
+    // gen_checkcast folds up completely.
+    return;
+  }
+
+  // Extract the array klass type
+  int klass_offset = oopDesc::klass_offset_in_bytes();
+  Node* p = basic_plus_adr( ary, ary, klass_offset );
+  // p's type is array-of-OOPS plus klass_offset
+  Node* array_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p, TypeInstPtr::KLASS));
+  // Get the array klass
+  const TypeKlassPtr *tak = _gvn.type(array_klass)->is_klassptr();
+
+  // array_klass's type is generally INexact array-of-oop.  Heroically
+  // cast the array klass to EXACT array and uncommon-trap if the cast
+  // fails.
+  bool always_see_exact_class = false;
+  if (MonomorphicArrayCheck
+      && !too_many_traps(Deoptimization::Reason_array_check)) {
+    always_see_exact_class = true;
+    // (If no MDO at all, hope for the best, until a trap actually occurs.)
+  }
+
+  // Is the array klass is exactly its defined type?
+  if (always_see_exact_class && !tak->klass_is_exact()) {
+    // Make a constant out of the inexact array klass
+    const TypeKlassPtr *extak = tak->cast_to_exactness(true)->is_klassptr();
+    Node* con = makecon(extak);
+    Node* cmp = _gvn.transform(new (C, 3) CmpPNode( array_klass, con ));
+    Node* bol = _gvn.transform(new (C, 2) BoolNode( cmp, BoolTest::eq ));
+    Node* ctrl= control();
+    { BuildCutout unless(this, bol, PROB_MAX);
+      uncommon_trap(Deoptimization::Reason_array_check,
+                    Deoptimization::Action_maybe_recompile,
+                    tak->klass());
+    }
+    if (stopped()) {          // MUST uncommon-trap?
+      set_control(ctrl);      // Then Don't Do It, just fall into the normal checking
+    } else {                  // Cast array klass to exactness:
+      // Use the exact constant value we know it is.
+      replace_in_map(array_klass,con);
+      CompileLog* log = C->log();
+      if (log != NULL) {
+        log->elem("cast_up reason='monomorphic_array' from='%d' to='(exact)'",
+                  log->identify(tak->klass()));
+      }
+      array_klass = con;      // Use cast value moving forward
+    }
+  }
+
+  // Come here for polymorphic array klasses
+
+  // Extract the array element class
+  int element_klass_offset = objArrayKlass::element_klass_offset_in_bytes() + sizeof(oopDesc);
+  Node *p2 = basic_plus_adr(array_klass, array_klass, element_klass_offset);
+  Node *a_e_klass = _gvn.transform(new (C, 3) LoadKlassNode(0, immutable_memory(), p2, tak));
+
+  // Check (the hard way) and throw if not a subklass.
+  // Result is ignored, we just need the CFG effects.
+  gen_checkcast( obj, a_e_klass );
+}
+
+
+//------------------------------do_new-----------------------------------------
+void Parse::do_new() {
+  kill_dead_locals();
+
+  bool will_link;
+  ciInstanceKlass* klass = iter().get_klass(will_link)->as_instance_klass();
+  assert(will_link, "_new: typeflow responsibility");
+
+  // Should initialize, or throw an InstantiationError?
+  if (!klass->is_initialized() ||
+      klass->is_abstract() || klass->is_interface() ||
+      klass->name() == ciSymbol::java_lang_Class() ||
+      iter().is_unresolved_klass()) {
+    uncommon_trap(Deoptimization::Reason_uninitialized,
+                  Deoptimization::Action_reinterpret,
+                  klass);
+    return;
+  }
+
+  Node* kls = makecon(TypeKlassPtr::make(klass));
+  Node* obj = new_instance(kls);
+
+  // Push resultant oop onto stack
+  push(obj);
+}
+
+#ifndef PRODUCT
+//------------------------------dump_map_adr_mem-------------------------------
+// Debug dump of the mapping from address types to MergeMemNode indices.
+void Parse::dump_map_adr_mem() const {
+  tty->print_cr("--- Mapping from address types to memory Nodes ---");
+  MergeMemNode *mem = map() == NULL ? NULL : (map()->memory()->is_MergeMem() ?
+                                      map()->memory()->as_MergeMem() : NULL);
+  for (uint i = 0; i < (uint)C->num_alias_types(); i++) {
+    C->alias_type(i)->print_on(tty);
+    tty->print("\t");
+    // Node mapping, if any
+    if (mem && i < mem->req() && mem->in(i) && mem->in(i) != mem->empty_memory()) {
+      mem->in(i)->dump();
+    } else {
+      tty->cr();
+    }
+  }
+}
+
+#endif
+
+
+//=============================================================================
+//
+// parser methods for profiling
+
+
+//----------------------test_counter_against_threshold ------------------------
+void Parse::test_counter_against_threshold(Node* cnt, int limit) {
+  // Test the counter against the limit and uncommon trap if greater.
+
+  // This code is largely copied from the range check code in
+  // array_addressing()
+
+  // Test invocation count vs threshold
+  Node *threshold = makecon(TypeInt::make(limit));
+  Node *chk   = _gvn.transform( new (C, 3) CmpUNode( cnt, threshold) );
+  BoolTest::mask btest = BoolTest::lt;
+  Node *tst   = _gvn.transform( new (C, 2) BoolNode( chk, btest) );
+  // Branch to failure if threshold exceeded
+  { BuildCutout unless(this, tst, PROB_ALWAYS);
+    uncommon_trap(Deoptimization::Reason_age,
+                  Deoptimization::Action_maybe_recompile);
+  }
+}
+
+//----------------------increment_and_test_invocation_counter-------------------
+void Parse::increment_and_test_invocation_counter(int limit) {
+  if (!count_invocations()) return;
+
+  // Get the methodOop node.
+  const TypePtr* adr_type = TypeOopPtr::make_from_constant(method());
+  Node *methodOop_node = makecon(adr_type);
+
+  // Load the interpreter_invocation_counter from the methodOop.
+  int offset = methodOopDesc::interpreter_invocation_counter_offset_in_bytes();
+  Node* adr_node = basic_plus_adr(methodOop_node, methodOop_node, offset);
+  Node* cnt = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+
+  test_counter_against_threshold(cnt, limit);
+
+  // Add one to the counter and store
+  Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(1)));
+  store_to_memory( NULL, adr_node, incr, T_INT, adr_type );
+}
+
+//----------------------------method_data_addressing---------------------------
+Node* Parse::method_data_addressing(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, Node* idx, uint stride) {
+  // Get offset within methodDataOop of the data array
+  ByteSize data_offset = methodDataOopDesc::data_offset();
+
+  // Get cell offset of the ProfileData within data array
+  int cell_offset = md->dp_to_di(data->dp());
+
+  // Add in counter_offset, the # of bytes into the ProfileData of counter or flag
+  int offset = in_bytes(data_offset) + cell_offset + in_bytes(counter_offset);
+
+  const TypePtr* adr_type = TypeOopPtr::make_from_constant(md);
+  Node* mdo = makecon(adr_type);
+  Node* ptr = basic_plus_adr(mdo, mdo, offset);
+
+  if (stride != 0) {
+    Node* str = _gvn.MakeConX(stride);
+    Node* scale = _gvn.transform( new (C, 3) MulXNode( idx, str ) );
+    ptr   = _gvn.transform( new (C, 4) AddPNode( mdo, ptr, scale ) );
+  }
+
+  return ptr;
+}
+
+//--------------------------increment_md_counter_at----------------------------
+void Parse::increment_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, Node* idx, uint stride) {
+  Node* adr_node = method_data_addressing(md, data, counter_offset, idx, stride);
+
+  const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+  Node* incr = _gvn.transform(new (C, 3) AddINode(cnt, _gvn.intcon(DataLayout::counter_increment)));
+  store_to_memory(NULL, adr_node, incr, T_INT, adr_type );
+}
+
+//--------------------------test_for_osr_md_counter_at-------------------------
+void Parse::test_for_osr_md_counter_at(ciMethodData* md, ciProfileData* data, ByteSize counter_offset, int limit) {
+  Node* adr_node = method_data_addressing(md, data, counter_offset);
+
+  const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+  Node* cnt  = make_load(NULL, adr_node, TypeInt::INT, T_INT, adr_type);
+
+  test_counter_against_threshold(cnt, limit);
+}
+
+//-------------------------------set_md_flag_at--------------------------------
+void Parse::set_md_flag_at(ciMethodData* md, ciProfileData* data, int flag_constant) {
+  Node* adr_node = method_data_addressing(md, data, DataLayout::flags_offset());
+
+  const TypePtr* adr_type = _gvn.type(adr_node)->is_ptr();
+  Node* flags = make_load(NULL, adr_node, TypeInt::BYTE, T_BYTE, adr_type);
+  Node* incr = _gvn.transform(new (C, 3) OrINode(flags, _gvn.intcon(flag_constant)));
+  store_to_memory(NULL, adr_node, incr, T_BYTE, adr_type);
+}
+
+//----------------------------profile_taken_branch-----------------------------
+void Parse::profile_taken_branch(int target_bci, bool force_update) {
+  // This is a potential osr_site if we have a backedge.
+  int cur_bci = bci();
+  bool osr_site =
+    (target_bci <= cur_bci) && count_invocations() && UseOnStackReplacement;
+
+  // If we are going to OSR, restart at the target bytecode.
+  set_bci(target_bci);
+
+  // To do: factor out the the limit calculations below. These duplicate
+  // the similar limit calculations in the interpreter.
+
+  if (method_data_update() || force_update) {
+    ciMethodData* md = method()->method_data();
+    assert(md != NULL, "expected valid ciMethodData");
+    ciProfileData* data = md->bci_to_data(cur_bci);
+    assert(data->is_JumpData(), "need JumpData for taken branch");
+    increment_md_counter_at(md, data, JumpData::taken_offset());
+  }
+
+  // In the new tiered system this is all we need to do. In the old
+  // (c2 based) tiered sytem we must do the code below.
+#ifndef TIERED
+  if (method_data_update()) {
+    ciMethodData* md = method()->method_data();
+    if (osr_site) {
+      ciProfileData* data = md->bci_to_data(cur_bci);
+      int limit = (CompileThreshold
+                   * (OnStackReplacePercentage - InterpreterProfilePercentage)) / 100;
+      test_for_osr_md_counter_at(md, data, JumpData::taken_offset(), limit);
+    }
+  } else {
+    // With method data update off, use the invocation counter to trigger an
+    // OSR compilation, as done in the interpreter.
+    if (osr_site) {
+      int limit = (CompileThreshold * OnStackReplacePercentage) / 100;
+      increment_and_test_invocation_counter(limit);
+    }
+  }
+#endif // TIERED
+
+  // Restore the original bytecode.
+  set_bci(cur_bci);
+}
+
+//--------------------------profile_not_taken_branch---------------------------
+void Parse::profile_not_taken_branch(bool force_update) {
+
+  if (method_data_update() || force_update) {
+    ciMethodData* md = method()->method_data();
+    assert(md != NULL, "expected valid ciMethodData");
+    ciProfileData* data = md->bci_to_data(bci());
+    assert(data->is_BranchData(), "need BranchData for not taken branch");
+    increment_md_counter_at(md, data, BranchData::not_taken_offset());
+  }
+
+}
+
+//---------------------------------profile_call--------------------------------
+void Parse::profile_call(Node* receiver) {
+  if (!method_data_update()) return;
+
+  profile_generic_call();
+
+  switch (bc()) {
+  case Bytecodes::_invokevirtual:
+  case Bytecodes::_invokeinterface:
+    profile_receiver_type(receiver);
+    break;
+  case Bytecodes::_invokestatic:
+  case Bytecodes::_invokespecial:
+    break;
+  default: fatal("unexpected call bytecode");
+  }
+}
+
+//------------------------------profile_generic_call---------------------------
+void Parse::profile_generic_call() {
+  assert(method_data_update(), "must be generating profile code");
+
+  ciMethodData* md = method()->method_data();
+  assert(md != NULL, "expected valid ciMethodData");
+  ciProfileData* data = md->bci_to_data(bci());
+  assert(data->is_CounterData(), "need CounterData for not taken branch");
+  increment_md_counter_at(md, data, CounterData::count_offset());
+}
+
+//-----------------------------profile_receiver_type---------------------------
+void Parse::profile_receiver_type(Node* receiver) {
+  assert(method_data_update(), "must be generating profile code");
+
+  // Skip if we aren't tracking receivers
+  if (TypeProfileWidth < 1) return;
+
+  ciMethodData* md = method()->method_data();
+  assert(md != NULL, "expected valid ciMethodData");
+  ciProfileData* data = md->bci_to_data(bci());
+  assert(data->is_ReceiverTypeData(), "need ReceiverTypeData here");
+  ciReceiverTypeData* rdata = (ciReceiverTypeData*)data->as_ReceiverTypeData();
+
+  Node* method_data = method_data_addressing(md, rdata, in_ByteSize(0));
+
+  // Using an adr_type of TypePtr::BOTTOM to work around anti-dep problems.
+  // A better solution might be to use TypeRawPtr::BOTTOM with RC_NARROW_MEM.
+  make_runtime_call(RC_LEAF, OptoRuntime::profile_receiver_type_Type(),
+                    CAST_FROM_FN_PTR(address,
+                                     OptoRuntime::profile_receiver_type_C),
+                    "profile_receiver_type_C",
+                    TypePtr::BOTTOM,
+                    method_data, receiver);
+}
+
+//---------------------------------profile_ret---------------------------------
+void Parse::profile_ret(int target_bci) {
+  if (!method_data_update()) return;
+
+  // Skip if we aren't tracking ret targets
+  if (TypeProfileWidth < 1) return;
+
+  ciMethodData* md = method()->method_data();
+  assert(md != NULL, "expected valid ciMethodData");
+  ciProfileData* data = md->bci_to_data(bci());
+  assert(data->is_RetData(), "need RetData for ret");
+  ciRetData* ret_data = (ciRetData*)data->as_RetData();
+
+  // Look for the target_bci is already in the table
+  uint row;
+  bool table_full = true;
+  for (row = 0; row < ret_data->row_limit(); row++) {
+    int key = ret_data->bci(row);
+    table_full &= (key != RetData::no_bci);
+    if (key == target_bci) break;
+  }
+
+  if (row >= ret_data->row_limit()) {
+    // The target_bci was not found in the table.
+    if (!table_full) {
+      // XXX: Make slow call to update RetData
+    }
+    return;
+  }
+
+  // the target_bci is already in the table
+  increment_md_counter_at(md, data, RetData::bci_count_offset(row));
+}
+
+//--------------------------profile_null_checkcast----------------------------
+void Parse::profile_null_checkcast() {
+  // Set the null-seen flag, done in conjunction with the usual null check. We
+  // never unset the flag, so this is a one-way switch.
+  if (!method_data_update()) return;
+
+  ciMethodData* md = method()->method_data();
+  assert(md != NULL, "expected valid ciMethodData");
+  ciProfileData* data = md->bci_to_data(bci());
+  assert(data->is_BitData(), "need BitData for checkcast");
+  set_md_flag_at(md, data, BitData::null_seen_byte_constant());
+}
+
+//-----------------------------profile_switch_case-----------------------------
+void Parse::profile_switch_case(int table_index) {
+  if (!method_data_update()) return;
+
+  ciMethodData* md = method()->method_data();
+  assert(md != NULL, "expected valid ciMethodData");
+
+  ciProfileData* data = md->bci_to_data(bci());
+  assert(data->is_MultiBranchData(), "need MultiBranchData for switch case");
+  if (table_index >= 0) {
+    increment_md_counter_at(md, data, MultiBranchData::case_count_offset(table_index));
+  } else {
+    increment_md_counter_at(md, data, MultiBranchData::default_count_offset());
+  }
+}
diff --git a/src/share/vm/opto/phase.cpp b/src/share/vm/opto/phase.cpp
new file mode 100644
index 000000000..5e046dab0
--- /dev/null
+++ b/src/share/vm/opto/phase.cpp
@@ -0,0 +1,164 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_phase.cpp.incl"
+
+#ifndef PRODUCT
+int Phase::_total_bytes_compiled = 0;
+
+elapsedTimer Phase::_t_totalCompilation;
+elapsedTimer Phase::_t_methodCompilation;
+elapsedTimer Phase::_t_stubCompilation;
+#endif
+
+// The next timers used for LogCompilation
+elapsedTimer Phase::_t_parser;
+elapsedTimer Phase::_t_escapeAnalysis;
+elapsedTimer Phase::_t_optimizer;
+elapsedTimer   Phase::_t_idealLoop;
+elapsedTimer   Phase::_t_ccp;
+elapsedTimer Phase::_t_matcher;
+elapsedTimer Phase::_t_registerAllocation;
+elapsedTimer Phase::_t_output;
+
+#ifndef PRODUCT
+elapsedTimer Phase::_t_graphReshaping;
+elapsedTimer Phase::_t_scheduler;
+elapsedTimer Phase::_t_removeEmptyBlocks;
+elapsedTimer Phase::_t_macroExpand;
+elapsedTimer Phase::_t_peephole;
+elapsedTimer Phase::_t_codeGeneration;
+elapsedTimer Phase::_t_registerMethod;
+elapsedTimer Phase::_t_temporaryTimer1;
+elapsedTimer Phase::_t_temporaryTimer2;
+
+// Subtimers for _t_optimizer
+elapsedTimer   Phase::_t_iterGVN;
+elapsedTimer   Phase::_t_iterGVN2;
+
+// Subtimers for _t_registerAllocation
+elapsedTimer   Phase::_t_ctorChaitin;
+elapsedTimer   Phase::_t_buildIFGphysical;
+elapsedTimer   Phase::_t_computeLive;
+elapsedTimer   Phase::_t_regAllocSplit;
+elapsedTimer   Phase::_t_postAllocCopyRemoval;
+elapsedTimer   Phase::_t_fixupSpills;
+
+// Subtimers for _t_output
+elapsedTimer   Phase::_t_instrSched;
+elapsedTimer   Phase::_t_buildOopMaps;
+#endif
+
+//------------------------------Phase------------------------------------------
+Phase::Phase( PhaseNumber pnum ) : _pnum(pnum), C( pnum == Compiler ? NULL : Compile::current()) {
+  // Poll for requests from shutdown mechanism to quiesce comiler (4448539, 4448544).
+  // This is an effective place to poll, since the compiler is full of phases.
+  // In particular, every inlining site uses a recursively created Parse phase.
+  CompileBroker::maybe_block();
+}
+
+#ifndef PRODUCT
+static const double minimum_reported_time             = 0.0001; // seconds
+static const double expected_method_compile_coverage  = 0.97;   // %
+static const double minimum_meaningful_method_compile = 2.00;   // seconds
+
+void Phase::print_timers() {
+  tty->print_cr ("Accumulated compiler times:");
+  tty->print_cr ("---------------------------");
+  tty->print_cr ("  Total compilation: %3.3f sec.", Phase::_t_totalCompilation.seconds());
+  tty->print    ("    method compilation : %3.3f sec", Phase::_t_methodCompilation.seconds());
+  tty->print    ("/%d bytes",_total_bytes_compiled);
+  tty->print_cr (" (%3.0f bytes per sec) ", Phase::_total_bytes_compiled / Phase::_t_methodCompilation.seconds());
+  tty->print_cr ("    stub compilation   : %3.3f sec.", Phase::_t_stubCompilation.seconds());
+  tty->print_cr ("  Phases:");
+  tty->print_cr ("    parse        : %3.3f sec", Phase::_t_parser.seconds());
+  if (DoEscapeAnalysis) {
+    tty->print_cr ("    escape analysis : %3.3f sec", Phase::_t_escapeAnalysis.seconds());
+  }
+  tty->print_cr ("    optimizer    : %3.3f sec", Phase::_t_optimizer.seconds());
+  if( Verbose || WizardMode ) {
+    tty->print_cr ("      iterGVN      : %3.3f sec", Phase::_t_iterGVN.seconds());
+    tty->print_cr ("      idealLoop    : %3.3f sec", Phase::_t_idealLoop.seconds());
+    tty->print_cr ("      ccp          : %3.3f sec", Phase::_t_ccp.seconds());
+    tty->print_cr ("      iterGVN2     : %3.3f sec", Phase::_t_iterGVN2.seconds());
+    tty->print_cr ("      graphReshape : %3.3f sec", Phase::_t_graphReshaping.seconds());
+    double optimizer_subtotal = Phase::_t_iterGVN.seconds() +
+      Phase::_t_idealLoop.seconds() + Phase::_t_ccp.seconds() +
+      Phase::_t_graphReshaping.seconds();
+    double percent_of_optimizer = ((optimizer_subtotal == 0.0) ? 0.0 : (optimizer_subtotal / Phase::_t_optimizer.seconds() * 100.0));
+    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", optimizer_subtotal, percent_of_optimizer);
+  }
+  tty->print_cr ("    matcher      : %3.3f sec", Phase::_t_matcher.seconds());
+  tty->print_cr ("    scheduler    : %3.3f sec", Phase::_t_scheduler.seconds());
+  tty->print_cr ("    regalloc     : %3.3f sec", Phase::_t_registerAllocation.seconds());
+  if( Verbose || WizardMode ) {
+    tty->print_cr ("      ctorChaitin  : %3.3f sec", Phase::_t_ctorChaitin.seconds());
+    tty->print_cr ("      buildIFG     : %3.3f sec", Phase::_t_buildIFGphysical.seconds());
+    tty->print_cr ("      computeLive  : %3.3f sec", Phase::_t_computeLive.seconds());
+    tty->print_cr ("      regAllocSplit: %3.3f sec", Phase::_t_regAllocSplit.seconds());
+    tty->print_cr ("      postAllocCopyRemoval: %3.3f sec", Phase::_t_postAllocCopyRemoval.seconds());
+    tty->print_cr ("      fixupSpills  : %3.3f sec", Phase::_t_fixupSpills.seconds());
+    double regalloc_subtotal = Phase::_t_ctorChaitin.seconds() +
+      Phase::_t_buildIFGphysical.seconds() + Phase::_t_computeLive.seconds() +
+      Phase::_t_regAllocSplit.seconds()    + Phase::_t_fixupSpills.seconds() +
+      Phase::_t_postAllocCopyRemoval.seconds();
+    double percent_of_regalloc = ((regalloc_subtotal == 0.0) ? 0.0 : (regalloc_subtotal / Phase::_t_registerAllocation.seconds() * 100.0));
+    tty->print_cr ("      subtotal     : %3.3f sec,  %3.2f %%", regalloc_subtotal, percent_of_regalloc);
+  }
+  tty->print_cr ("    macroExpand  : %3.3f sec", Phase::_t_macroExpand.seconds());
+  tty->print_cr ("    removeEmpty  : %3.3f sec", Phase::_t_removeEmptyBlocks.seconds());
+  tty->print_cr ("    peephole     : %3.3f sec", Phase::_t_peephole.seconds());
+  tty->print_cr ("    codeGen      : %3.3f sec", Phase::_t_codeGeneration.seconds());
+  tty->print_cr ("    install_code : %3.3f sec", Phase::_t_registerMethod.seconds());
+  tty->print_cr ("    ------------ : ----------");
+  double phase_subtotal = Phase::_t_parser.seconds() +
+    (DoEscapeAnalysis ? Phase::_t_escapeAnalysis.seconds() : 0.0) +
+    Phase::_t_optimizer.seconds() + Phase::_t_graphReshaping.seconds() +
+    Phase::_t_matcher.seconds() + Phase::_t_scheduler.seconds() +
+    Phase::_t_registerAllocation.seconds() + Phase::_t_removeEmptyBlocks.seconds() +
+    Phase::_t_macroExpand.seconds() + Phase::_t_peephole.seconds() +
+    Phase::_t_codeGeneration.seconds() + Phase::_t_registerMethod.seconds();
+  double percent_of_method_compile = ((phase_subtotal == 0.0) ? 0.0 : phase_subtotal / Phase::_t_methodCompilation.seconds()) * 100.0;
+  // counters inside Compile::CodeGen include time for adapters and stubs
+  // so phase-total can be greater than 100%
+  tty->print_cr ("    total        : %3.3f sec,  %3.2f %%", phase_subtotal, percent_of_method_compile);
+
+  assert( percent_of_method_compile > expected_method_compile_coverage ||
+          phase_subtotal < minimum_meaningful_method_compile,
+          "Must account for method compilation");
+
+  if( Phase::_t_temporaryTimer1.seconds() > minimum_reported_time ) {
+    tty->cr();
+    tty->print_cr ("    temporaryTimer1: %3.3f sec", Phase::_t_temporaryTimer1.seconds());
+  }
+  if( Phase::_t_temporaryTimer2.seconds() > minimum_reported_time ) {
+    tty->cr();
+    tty->print_cr ("    temporaryTimer2: %3.3f sec", Phase::_t_temporaryTimer2.seconds());
+  }
+  tty->print_cr ("    output    : %3.3f sec", Phase::_t_output.seconds());
+  tty->print_cr ("      isched    : %3.3f sec", Phase::_t_instrSched.seconds());
+  tty->print_cr ("      bldOopMaps: %3.3f sec", Phase::_t_buildOopMaps.seconds());
+}
+#endif
diff --git a/src/share/vm/opto/phase.hpp b/src/share/vm/opto/phase.hpp
new file mode 100644
index 000000000..bee7dfef7
--- /dev/null
+++ b/src/share/vm/opto/phase.hpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+
+//------------------------------Phase------------------------------------------
+// Most optimizations are done in Phases.  Creating a phase does any long
+// running analysis required, and caches the analysis in internal data
+// structures.  Later the analysis is queried using transform() calls to
+// guide transforming the program.  When the Phase is deleted, so is any
+// cached analysis info.  This basic Phase class mostly contains timing and
+// memory management code.
+class Phase : public StackObj {
+public:
+  enum PhaseNumber {
+    Compiler,                   // Top-level compiler phase
+    Parser,                     // Parse bytecodes
+    Remove_Useless,             // Remove useless nodes
+    Optimistic,                 // Optimistic analysis phase
+    GVN,                        // Pessimistic global value numbering phase
+    Ins_Select,                 // Instruction selection phase
+    Copy_Elimination,           // Copy Elimination
+    Dead_Code_Elimination,      // DCE and compress Nodes
+    Conditional_Constant,       // Conditional Constant Propagation
+    CFG,                        // Build a CFG
+    DefUse,                     // Build Def->Use chains
+    Register_Allocation,        // Register allocation, duh
+    LIVE,                       // Dragon-book LIVE range problem
+    Interference_Graph,         // Building the IFG
+    Coalesce,                   // Coalescing copies
+    Conditional_CProp,          // Conditional Constant Propagation
+    Ideal_Loop,                 // Find idealized trip-counted loops
+    Macro_Expand,               // Expand macro nodes
+    Peephole,                   // Apply peephole optimizations
+    last_phase
+  };
+protected:
+  enum PhaseNumber _pnum;       // Phase number (for stat gathering)
+
+#ifndef PRODUCT
+  static int _total_bytes_compiled;
+
+  // accumulated timers
+  static elapsedTimer _t_totalCompilation;
+  static elapsedTimer _t_methodCompilation;
+  static elapsedTimer _t_stubCompilation;
+#endif
+
+// The next timers used for LogCompilation
+  static elapsedTimer _t_parser;
+  static elapsedTimer _t_escapeAnalysis;
+  static elapsedTimer _t_optimizer;
+  static elapsedTimer   _t_idealLoop;
+  static elapsedTimer   _t_ccp;
+  static elapsedTimer _t_matcher;
+  static elapsedTimer _t_registerAllocation;
+  static elapsedTimer _t_output;
+
+#ifndef PRODUCT
+  static elapsedTimer _t_graphReshaping;
+  static elapsedTimer _t_scheduler;
+  static elapsedTimer _t_removeEmptyBlocks;
+  static elapsedTimer _t_macroExpand;
+  static elapsedTimer _t_peephole;
+  static elapsedTimer _t_codeGeneration;
+  static elapsedTimer _t_registerMethod;
+  static elapsedTimer _t_temporaryTimer1;
+  static elapsedTimer _t_temporaryTimer2;
+
+// Subtimers for _t_optimizer
+  static elapsedTimer   _t_iterGVN;
+  static elapsedTimer   _t_iterGVN2;
+
+// Subtimers for _t_registerAllocation
+  static elapsedTimer   _t_ctorChaitin;
+  static elapsedTimer   _t_buildIFGphysical;
+  static elapsedTimer   _t_computeLive;
+  static elapsedTimer   _t_regAllocSplit;
+  static elapsedTimer   _t_postAllocCopyRemoval;
+  static elapsedTimer   _t_fixupSpills;
+
+// Subtimers for _t_output
+  static elapsedTimer   _t_instrSched;
+  static elapsedTimer   _t_buildOopMaps;
+#endif
+public:
+  Compile * C;
+  Phase( PhaseNumber pnum );
+#ifndef PRODUCT
+  static void print_timers();
+#endif
+};
diff --git a/src/share/vm/opto/phaseX.cpp b/src/share/vm/opto/phaseX.cpp
new file mode 100644
index 000000000..3c5cd2cca
--- /dev/null
+++ b/src/share/vm/opto/phaseX.cpp
@@ -0,0 +1,1758 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_phaseX.cpp.incl"
+
+//=============================================================================
+#define NODE_HASH_MINIMUM_SIZE    255
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(uint est_max_size) :
+  _max( round_up(est_max_size < NODE_HASH_MINIMUM_SIZE ? NODE_HASH_MINIMUM_SIZE : est_max_size) ),
+  _a(Thread::current()->resource_area()),
+  _table( NEW_ARENA_ARRAY( _a , Node* , _max ) ), // (Node**)_a->Amalloc(_max * sizeof(Node*)) ),
+  _inserts(0), _insert_limit( insert_limit() ),
+  _look_probes(0), _lookup_hits(0), _lookup_misses(0),
+  _total_insert_probes(0), _total_inserts(0),
+  _insert_probes(0), _grows(0) {
+  // _sentinel must be in the current node space
+  _sentinel = new (Compile::current(), 1) ProjNode(NULL, TypeFunc::Control);
+  memset(_table,0,sizeof(Node*)*_max);
+}
+
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(Arena *arena, uint est_max_size) :
+  _max( round_up(est_max_size < NODE_HASH_MINIMUM_SIZE ? NODE_HASH_MINIMUM_SIZE : est_max_size) ),
+  _a(arena),
+  _table( NEW_ARENA_ARRAY( _a , Node* , _max ) ),
+  _inserts(0), _insert_limit( insert_limit() ),
+  _look_probes(0), _lookup_hits(0), _lookup_misses(0),
+  _delete_probes(0), _delete_hits(0), _delete_misses(0),
+  _total_insert_probes(0), _total_inserts(0),
+  _insert_probes(0), _grows(0) {
+  // _sentinel must be in the current node space
+  _sentinel = new (Compile::current(), 1) ProjNode(NULL, TypeFunc::Control);
+  memset(_table,0,sizeof(Node*)*_max);
+}
+
+//------------------------------NodeHash---------------------------------------
+NodeHash::NodeHash(NodeHash *nh) {
+  debug_only(_table = (Node**)badAddress);   // interact correctly w/ operator=
+  // just copy in all the fields
+  *this = *nh;
+  // nh->_sentinel must be in the current node space
+}
+
+//------------------------------hash_find--------------------------------------
+// Find in hash table
+Node *NodeHash::hash_find( const Node *n ) {
+  // ((Node*)n)->set_hash( n->hash() );
+  uint hash = n->hash();
+  if (hash == Node::NO_HASH) {
+    debug_only( _lookup_misses++ );
+    return NULL;
+  }
+  uint key = hash & (_max-1);
+  uint stride = key | 0x01;
+  debug_only( _look_probes++ );
+  Node *k = _table[key];        // Get hashed value
+  if( !k ) {                    // ?Miss?
+    debug_only( _lookup_misses++ );
+    return NULL;                // Miss!
+  }
+
+  int op = n->Opcode();
+  uint req = n->req();
+  while( 1 ) {                  // While probing hash table
+    if( k->req() == req &&      // Same count of inputs
+        k->Opcode() == op ) {   // Same Opcode
+      for( uint i=0; i<req; i++ )
+        if( n->in(i)!=k->in(i)) // Different inputs?
+          goto collision;       // "goto" is a speed hack...
+      if( n->cmp(*k) ) {        // Check for any special bits
+        debug_only( _lookup_hits++ );
+        return k;               // Hit!
+      }
+    }
+  collision:
+    debug_only( _look_probes++ );
+    key = (key + stride/*7*/) & (_max-1); // Stride through table with relative prime
+    k = _table[key];            // Get hashed value
+    if( !k ) {                  // ?Miss?
+      debug_only( _lookup_misses++ );
+      return NULL;              // Miss!
+    }
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------hash_find_insert-------------------------------
+// Find in hash table, insert if not already present
+// Used to preserve unique entries in hash table
+Node *NodeHash::hash_find_insert( Node *n ) {
+  // n->set_hash( );
+  uint hash = n->hash();
+  if (hash == Node::NO_HASH) {
+    debug_only( _lookup_misses++ );
+    return NULL;
+  }
+  uint key = hash & (_max-1);
+  uint stride = key | 0x01;     // stride must be relatively prime to table siz
+  uint first_sentinel = 0;      // replace a sentinel if seen.
+  debug_only( _look_probes++ );
+  Node *k = _table[key];        // Get hashed value
+  if( !k ) {                    // ?Miss?
+    debug_only( _lookup_misses++ );
+    _table[key] = n;            // Insert into table!
+    debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+    check_grow();               // Grow table if insert hit limit
+    return NULL;                // Miss!
+  }
+  else if( k == _sentinel ) {
+    first_sentinel = key;      // Can insert here
+  }
+
+  int op = n->Opcode();
+  uint req = n->req();
+  while( 1 ) {                  // While probing hash table
+    if( k->req() == req &&      // Same count of inputs
+        k->Opcode() == op ) {   // Same Opcode
+      for( uint i=0; i<req; i++ )
+        if( n->in(i)!=k->in(i)) // Different inputs?
+          goto collision;       // "goto" is a speed hack...
+      if( n->cmp(*k) ) {        // Check for any special bits
+        debug_only( _lookup_hits++ );
+        return k;               // Hit!
+      }
+    }
+  collision:
+    debug_only( _look_probes++ );
+    key = (key + stride) & (_max-1); // Stride through table w/ relative prime
+    k = _table[key];            // Get hashed value
+    if( !k ) {                  // ?Miss?
+      debug_only( _lookup_misses++ );
+      key = (first_sentinel == 0) ? key : first_sentinel; // ?saw sentinel?
+      _table[key] = n;          // Insert into table!
+      debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+      check_grow();             // Grow table if insert hit limit
+      return NULL;              // Miss!
+    }
+    else if( first_sentinel == 0 && k == _sentinel ) {
+      first_sentinel = key;    // Can insert here
+    }
+
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------hash_insert------------------------------------
+// Insert into hash table
+void NodeHash::hash_insert( Node *n ) {
+  // // "conflict" comments -- print nodes that conflict
+  // bool conflict = false;
+  // n->set_hash();
+  uint hash = n->hash();
+  if (hash == Node::NO_HASH) {
+    return;
+  }
+  check_grow();
+  uint key = hash & (_max-1);
+  uint stride = key | 0x01;
+
+  while( 1 ) {                  // While probing hash table
+    debug_only( _insert_probes++ );
+    Node *k = _table[key];      // Get hashed value
+    if( !k || (k == _sentinel) ) break;       // Found a slot
+    assert( k != n, "already inserted" );
+    // if( PrintCompilation && PrintOptoStatistics && Verbose ) { tty->print("  conflict: "); k->dump(); conflict = true; }
+    key = (key + stride) & (_max-1); // Stride through table w/ relative prime
+  }
+  _table[key] = n;              // Insert into table!
+  debug_only(n->enter_hash_lock()); // Lock down the node while in the table.
+  // if( conflict ) { n->dump(); }
+}
+
+//------------------------------hash_delete------------------------------------
+// Replace in hash table with sentinal
+bool NodeHash::hash_delete( const Node *n ) {
+  Node *k;
+  uint hash = n->hash();
+  if (hash == Node::NO_HASH) {
+    debug_only( _delete_misses++ );
+    return false;
+  }
+  uint key = hash & (_max-1);
+  uint stride = key | 0x01;
+  debug_only( uint counter = 0; );
+  for( ; /* (k != NULL) && (k != _sentinal) */; ) {
+    debug_only( counter++ );
+    debug_only( _delete_probes++ );
+    k = _table[key];            // Get hashed value
+    if( !k ) {                  // Miss?
+      debug_only( _delete_misses++ );
+#ifdef ASSERT
+      if( VerifyOpto ) {
+        for( uint i=0; i < _max; i++ )
+          assert( _table[i] != n, "changed edges with rehashing" );
+      }
+#endif
+      return false;             // Miss! Not in chain
+    }
+    else if( n == k ) {
+      debug_only( _delete_hits++ );
+      _table[key] = _sentinel;  // Hit! Label as deleted entry
+      debug_only(((Node*)n)->exit_hash_lock()); // Unlock the node upon removal from table.
+      return true;
+    }
+    else {
+      // collision: move through table with prime offset
+      key = (key + stride/*7*/) & (_max-1);
+      assert( counter <= _insert_limit, "Cycle in hash-table");
+    }
+  }
+  ShouldNotReachHere();
+  return false;
+}
+
+//------------------------------round_up---------------------------------------
+// Round up to nearest power of 2
+uint NodeHash::round_up( uint x ) {
+  x += (x>>2);                  // Add 25% slop
+  if( x <16 ) return 16;        // Small stuff
+  uint i=16;
+  while( i < x ) i <<= 1;       // Double to fit
+  return i;                     // Return hash table size
+}
+
+//------------------------------grow-------------------------------------------
+// Grow _table to next power of 2 and insert old entries
+void  NodeHash::grow() {
+  // Record old state
+  uint   old_max   = _max;
+  Node **old_table = _table;
+  // Construct new table with twice the space
+  _grows++;
+  _total_inserts       += _inserts;
+  _total_insert_probes += _insert_probes;
+  _inserts         = 0;
+  _insert_probes   = 0;
+  _max     = _max << 1;
+  _table   = NEW_ARENA_ARRAY( _a , Node* , _max ); // (Node**)_a->Amalloc( _max * sizeof(Node*) );
+  memset(_table,0,sizeof(Node*)*_max);
+  _insert_limit = insert_limit();
+  // Insert old entries into the new table
+  for( uint i = 0; i < old_max; i++ ) {
+    Node *m = *old_table++;
+    if( !m || m == _sentinel ) continue;
+    debug_only(m->exit_hash_lock()); // Unlock the node upon removal from old table.
+    hash_insert(m);
+  }
+}
+
+//------------------------------clear------------------------------------------
+// Clear all entries in _table to NULL but keep storage
+void  NodeHash::clear() {
+#ifdef ASSERT
+  // Unlock all nodes upon removal from table.
+  for (uint i = 0; i < _max; i++) {
+    Node* n = _table[i];
+    if (!n || n == _sentinel)  continue;
+    n->exit_hash_lock();
+  }
+#endif
+
+  memset( _table, 0, _max * sizeof(Node*) );
+}
+
+//-----------------------remove_useless_nodes----------------------------------
+// Remove useless nodes from value table,
+// implementation does not depend on hash function
+void NodeHash::remove_useless_nodes(VectorSet &useful) {
+
+  // Dead nodes in the hash table inherited from GVN should not replace
+  // existing nodes, remove dead nodes.
+  uint max = size();
+  Node *sentinel_node = sentinel();
+  for( uint i = 0; i < max; ++i ) {
+    Node *n = at(i);
+    if(n != NULL && n != sentinel_node && !useful.test(n->_idx)) {
+      debug_only(n->exit_hash_lock()); // Unlock the node when removed
+      _table[i] = sentinel_node;       // Replace with placeholder
+    }
+  }
+}
+
+#ifndef PRODUCT
+//------------------------------dump-------------------------------------------
+// Dump statistics for the hash table
+void NodeHash::dump() {
+  _total_inserts       += _inserts;
+  _total_insert_probes += _insert_probes;
+  if( PrintCompilation && PrintOptoStatistics && Verbose && (_inserts > 0) ) { // PrintOptoGVN
+    if( PrintCompilation2 ) {
+      for( uint i=0; i<_max; i++ )
+      if( _table[i] )
+        tty->print("%d/%d/%d ",i,_table[i]->hash()&(_max-1),_table[i]->_idx);
+    }
+    tty->print("\nGVN Hash stats:  %d grows to %d max_size\n", _grows, _max);
+    tty->print("  %d/%d (%8.1f%% full)\n", _inserts, _max, (double)_inserts/_max*100.0);
+    tty->print("  %dp/(%dh+%dm) (%8.2f probes/lookup)\n", _look_probes, _lookup_hits, _lookup_misses, (double)_look_probes/(_lookup_hits+_lookup_misses));
+    tty->print("  %dp/%di (%8.2f probes/insert)\n", _total_insert_probes, _total_inserts, (double)_total_insert_probes/_total_inserts);
+    // sentinels increase lookup cost, but not insert cost
+    assert((_lookup_misses+_lookup_hits)*4+100 >= _look_probes, "bad hash function");
+    assert( _inserts+(_inserts>>3) < _max, "table too full" );
+    assert( _inserts*3+100 >= _insert_probes, "bad hash function" );
+  }
+}
+
+Node *NodeHash::find_index(uint idx) { // For debugging
+  // Find an entry by its index value
+  for( uint i = 0; i < _max; i++ ) {
+    Node *m = _table[i];
+    if( !m || m == _sentinel ) continue;
+    if( m->_idx == (uint)idx ) return m;
+  }
+  return NULL;
+}
+#endif
+
+#ifdef ASSERT
+NodeHash::~NodeHash() {
+  // Unlock all nodes upon destruction of table.
+  if (_table != (Node**)badAddress)  clear();
+}
+
+void NodeHash::operator=(const NodeHash& nh) {
+  // Unlock all nodes upon replacement of table.
+  if (&nh == this)  return;
+  if (_table != (Node**)badAddress)  clear();
+  memcpy(this, &nh, sizeof(*this));
+  // Do not increment hash_lock counts again.
+  // Instead, be sure we never again use the source table.
+  ((NodeHash*)&nh)->_table = (Node**)badAddress;
+}
+
+
+#endif
+
+
+//=============================================================================
+//------------------------------PhaseRemoveUseless-----------------------------
+// 1) Use a breadthfirst walk to collect useful nodes reachable from root.
+PhaseRemoveUseless::PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist ) : Phase(Remove_Useless),
+  _useful(Thread::current()->resource_area()) {
+
+  // Implementation requires 'UseLoopSafepoints == true' and an edge from root
+  // to each SafePointNode at a backward branch.  Inserted in add_safepoint().
+  if( !UseLoopSafepoints || !OptoRemoveUseless ) return;
+
+  // Identify nodes that are reachable from below, useful.
+  C->identify_useful_nodes(_useful);
+
+  // Remove all useless nodes from PhaseValues' recorded types
+  // Must be done before disconnecting nodes to preserve hash-table-invariant
+  gvn->remove_useless_nodes(_useful.member_set());
+
+  // Remove all useless nodes from future worklist
+  worklist->remove_useless_nodes(_useful.member_set());
+
+  // Disconnect 'useless' nodes that are adjacent to useful nodes
+  C->remove_useless_nodes(_useful);
+
+  // Remove edges from "root" to each SafePoint at a backward branch.
+  // They were inserted during parsing (see add_safepoint()) to make infinite
+  // loops without calls or exceptions visible to root, i.e., useful.
+  Node *root = C->root();
+  if( root != NULL ) {
+    for( uint i = root->req(); i < root->len(); ++i ) {
+      Node *n = root->in(i);
+      if( n != NULL && n->is_SafePoint() ) {
+        root->rm_prec(i);
+        --i;
+      }
+    }
+  }
+}
+
+
+//=============================================================================
+//------------------------------PhaseTransform---------------------------------
+PhaseTransform::PhaseTransform( PhaseNumber pnum ) : Phase(pnum),
+  _arena(Thread::current()->resource_area()),
+  _nodes(_arena),
+  _types(_arena)
+{
+  init_con_caches();
+#ifndef PRODUCT
+  clear_progress();
+  clear_transforms();
+  set_allow_progress(true);
+#endif
+  // Force allocation for currently existing nodes
+  _types.map(C->unique(), NULL);
+}
+
+//------------------------------PhaseTransform---------------------------------
+PhaseTransform::PhaseTransform( Arena *arena, PhaseNumber pnum ) : Phase(pnum),
+  _arena(arena),
+  _nodes(arena),
+  _types(arena)
+{
+  init_con_caches();
+#ifndef PRODUCT
+  clear_progress();
+  clear_transforms();
+  set_allow_progress(true);
+#endif
+  // Force allocation for currently existing nodes
+  _types.map(C->unique(), NULL);
+}
+
+//------------------------------PhaseTransform---------------------------------
+// Initialize with previously generated type information
+PhaseTransform::PhaseTransform( PhaseTransform *pt, PhaseNumber pnum ) : Phase(pnum),
+  _arena(pt->_arena),
+  _nodes(pt->_nodes),
+  _types(pt->_types)
+{
+  init_con_caches();
+#ifndef PRODUCT
+  clear_progress();
+  clear_transforms();
+  set_allow_progress(true);
+#endif
+}
+
+void PhaseTransform::init_con_caches() {
+  memset(_icons,0,sizeof(_icons));
+  memset(_lcons,0,sizeof(_lcons));
+  memset(_zcons,0,sizeof(_zcons));
+}
+
+
+//--------------------------------find_int_type--------------------------------
+const TypeInt* PhaseTransform::find_int_type(Node* n) {
+  if (n == NULL)  return NULL;
+  // Call type_or_null(n) to determine node's type since we might be in
+  // parse phase and call n->Value() may return wrong type.
+  // (For example, a phi node at the beginning of loop parsing is not ready.)
+  const Type* t = type_or_null(n);
+  if (t == NULL)  return NULL;
+  return t->isa_int();
+}
+
+
+//-------------------------------find_long_type--------------------------------
+const TypeLong* PhaseTransform::find_long_type(Node* n) {
+  if (n == NULL)  return NULL;
+  // (See comment above on type_or_null.)
+  const Type* t = type_or_null(n);
+  if (t == NULL)  return NULL;
+  return t->isa_long();
+}
+
+
+#ifndef PRODUCT
+void PhaseTransform::dump_old2new_map() const {
+  _nodes.dump();
+}
+
+void PhaseTransform::dump_new( uint nidx ) const {
+  for( uint i=0; i<_nodes.Size(); i++ )
+    if( _nodes[i] && _nodes[i]->_idx == nidx ) {
+      _nodes[i]->dump();
+      tty->cr();
+      tty->print_cr("Old index= %d",i);
+      return;
+    }
+  tty->print_cr("Node %d not found in the new indices", nidx);
+}
+
+//------------------------------dump_types-------------------------------------
+void PhaseTransform::dump_types( ) const {
+  _types.dump();
+}
+
+//------------------------------dump_nodes_and_types---------------------------
+void PhaseTransform::dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl) {
+  VectorSet visited(Thread::current()->resource_area());
+  dump_nodes_and_types_recur( root, depth, only_ctrl, visited );
+}
+
+//------------------------------dump_nodes_and_types_recur---------------------
+void PhaseTransform::dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited) {
+  if( !n ) return;
+  if( depth == 0 ) return;
+  if( visited.test_set(n->_idx) ) return;
+  for( uint i=0; i<n->len(); i++ ) {
+    if( only_ctrl && !(n->is_Region()) && i != TypeFunc::Control ) continue;
+    dump_nodes_and_types_recur( n->in(i), depth-1, only_ctrl, visited );
+  }
+  n->dump();
+  if (type_or_null(n) != NULL) {
+    tty->print("      "); type(n)->dump(); tty->cr();
+  }
+}
+
+#endif
+
+
+//=============================================================================
+//------------------------------PhaseValues------------------------------------
+// Set minimum table size to "255"
+PhaseValues::PhaseValues( Arena *arena, uint est_max_size ) : PhaseTransform(arena, GVN), _table(arena, est_max_size) {
+  NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------PhaseValues------------------------------------
+// Set minimum table size to "255"
+PhaseValues::PhaseValues( PhaseValues *ptv ) : PhaseTransform( ptv, GVN ),
+  _table(&ptv->_table) {
+  NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------PhaseValues------------------------------------
+// Used by +VerifyOpto.  Clear out hash table but copy _types array.
+PhaseValues::PhaseValues( PhaseValues *ptv, const char *dummy ) : PhaseTransform( ptv, GVN ),
+  _table(ptv->arena(),ptv->_table.size()) {
+  NOT_PRODUCT( clear_new_values(); )
+}
+
+//------------------------------~PhaseValues-----------------------------------
+#ifndef PRODUCT
+PhaseValues::~PhaseValues() {
+  _table.dump();
+
+  // Statistics for value progress and efficiency
+  if( PrintCompilation && Verbose && WizardMode ) {
+    tty->print("\n%sValues: %d nodes ---> %d/%d (%d)",
+      is_IterGVN() ? "Iter" : "    ", C->unique(), made_progress(), made_transforms(), made_new_values());
+    if( made_transforms() != 0 ) {
+      tty->print_cr("  ratio %f", made_progress()/(float)made_transforms() );
+    } else {
+      tty->cr();
+    }
+  }
+}
+#endif
+
+//------------------------------makecon----------------------------------------
+ConNode* PhaseTransform::makecon(const Type *t) {
+  assert(t->singleton(), "must be a constant");
+  assert(!t->empty() || t == Type::TOP, "must not be vacuous range");
+  switch (t->base()) {  // fast paths
+  case Type::Half:
+  case Type::Top:  return (ConNode*) C->top();
+  case Type::Int:  return intcon( t->is_int()->get_con() );
+  case Type::Long: return longcon( t->is_long()->get_con() );
+  }
+  if (t->is_zero_type())
+    return zerocon(t->basic_type());
+  return uncached_makecon(t);
+}
+
+//--------------------------uncached_makecon-----------------------------------
+// Make an idealized constant - one of ConINode, ConPNode, etc.
+ConNode* PhaseValues::uncached_makecon(const Type *t) {
+  assert(t->singleton(), "must be a constant");
+  ConNode* x = ConNode::make(C, t);
+  ConNode* k = (ConNode*)hash_find_insert(x); // Value numbering
+  if (k == NULL) {
+    set_type(x, t);             // Missed, provide type mapping
+    GrowableArray<Node_Notes*>* nna = C->node_note_array();
+    if (nna != NULL) {
+      Node_Notes* loc = C->locate_node_notes(nna, x->_idx, true);
+      loc->clear(); // do not put debug info on constants
+    }
+    // Collect points-to information for escape analysys
+    ConnectionGraph *cgr = C->congraph();
+    if (cgr != NULL) {
+      cgr->record_escape(x, this);
+    }
+  } else {
+    x->destruct();              // Hit, destroy duplicate constant
+    x = k;                      // use existing constant
+  }
+  return x;
+}
+
+//------------------------------intcon-----------------------------------------
+// Fast integer constant.  Same as "transform(new ConINode(TypeInt::make(i)))"
+ConINode* PhaseTransform::intcon(int i) {
+  // Small integer?  Check cache! Check that cached node is not dead
+  if (i >= _icon_min && i <= _icon_max) {
+    ConINode* icon = _icons[i-_icon_min];
+    if (icon != NULL && icon->in(TypeFunc::Control) != NULL)
+      return icon;
+  }
+  ConINode* icon = (ConINode*) uncached_makecon(TypeInt::make(i));
+  assert(icon->is_Con(), "");
+  if (i >= _icon_min && i <= _icon_max)
+    _icons[i-_icon_min] = icon;   // Cache small integers
+  return icon;
+}
+
+//------------------------------longcon----------------------------------------
+// Fast long constant.
+ConLNode* PhaseTransform::longcon(jlong l) {
+  // Small integer?  Check cache! Check that cached node is not dead
+  if (l >= _lcon_min && l <= _lcon_max) {
+    ConLNode* lcon = _lcons[l-_lcon_min];
+    if (lcon != NULL && lcon->in(TypeFunc::Control) != NULL)
+      return lcon;
+  }
+  ConLNode* lcon = (ConLNode*) uncached_makecon(TypeLong::make(l));
+  assert(lcon->is_Con(), "");
+  if (l >= _lcon_min && l <= _lcon_max)
+    _lcons[l-_lcon_min] = lcon;      // Cache small integers
+  return lcon;
+}
+
+//------------------------------zerocon-----------------------------------------
+// Fast zero or null constant. Same as "transform(ConNode::make(Type::get_zero_type(bt)))"
+ConNode* PhaseTransform::zerocon(BasicType bt) {
+  assert((uint)bt <= _zcon_max, "domain check");
+  ConNode* zcon = _zcons[bt];
+  if (zcon != NULL && zcon->in(TypeFunc::Control) != NULL)
+    return zcon;
+  zcon = (ConNode*) uncached_makecon(Type::get_zero_type(bt));
+  _zcons[bt] = zcon;
+  return zcon;
+}
+
+
+
+//=============================================================================
+//------------------------------transform--------------------------------------
+// Return a node which computes the same function as this node, but in a
+// faster or cheaper fashion.  The Node passed in here must have no other
+// pointers to it, as its storage will be reclaimed if the Node can be
+// optimized away.
+Node *PhaseGVN::transform( Node *n ) {
+  NOT_PRODUCT( set_transforms(); )
+
+  // Apply the Ideal call in a loop until it no longer applies
+  Node *k = n;
+  NOT_PRODUCT( uint loop_count = 0; )
+  while( 1 ) {
+    Node *i = k->Ideal(this, /*can_reshape=*/false);
+    if( !i ) break;
+    assert( i->_idx >= k->_idx, "Idealize should return new nodes, use Identity to return old nodes" );
+    // Can never reclaim storage for Ideal calls, because the Ideal call
+    // returns a new Node, bumping the High Water Mark and our old Node
+    // is caught behind the new one.
+    //if( k != i ) {
+    //k->destruct();            // Reclaim storage for recent node
+    k = i;
+    //}
+    assert(loop_count++ < K, "infinite loop in PhaseGVN::transform");
+  }
+  NOT_PRODUCT( if( loop_count != 0 ) { set_progress(); } )
+
+  // If brand new node, make space in type array.
+  ensure_type_or_null(k);
+
+  // Cache result of Value call since it can be expensive
+  // (abstract interpretation of node 'k' using phase->_types[ inputs ])
+  const Type *t = k->Value(this); // Get runtime Value set
+  assert(t != NULL, "value sanity");
+  if (type_or_null(k) != t) {
+#ifndef PRODUCT
+    // Do not record transformation or value construction on first visit
+    if (type_or_null(k) == NULL) {
+      inc_new_values();
+      set_progress();
+    }
+#endif
+    set_type(k, t);
+    // If k is a TypeNode, capture any more-precise type permanently into Node
+    k->raise_bottom_type(t);
+  }
+
+  if( t->singleton() && !k->is_Con() ) {
+    //k->destruct();              // Reclaim storage for recent node
+    NOT_PRODUCT( set_progress(); )
+    return makecon(t);          // Turn into a constant
+  }
+
+  // Now check for Identities
+  Node *i = k->Identity(this);  // Look for a nearby replacement
+  if( i != k ) {                // Found? Return replacement!
+    //k->destruct();              // Reclaim storage for recent node
+    NOT_PRODUCT( set_progress(); )
+    return i;
+  }
+
+  // Try Global Value Numbering
+  i = hash_find_insert(k);      // Found older value when i != NULL
+  if( i && i != k ) {           // Hit? Return the old guy
+    NOT_PRODUCT( set_progress(); )
+    return i;
+  }
+
+  // Collect points-to information for escape analysys
+  ConnectionGraph *cgr = C->congraph();
+  if (cgr != NULL) {
+    cgr->record_escape(k, this);
+  }
+
+  // Return Idealized original
+  return k;
+}
+
+//------------------------------transform--------------------------------------
+// Return a node which computes the same function as this node, but
+// in a faster or cheaper fashion.
+Node *PhaseGVN::transform_no_reclaim( Node *n ) {
+  NOT_PRODUCT( set_transforms(); )
+
+  // Apply the Ideal call in a loop until it no longer applies
+  Node *k = n;
+  NOT_PRODUCT( uint loop_count = 0; )
+  while( 1 ) {
+    Node *i = k->Ideal(this, /*can_reshape=*/false);
+    if( !i ) break;
+    assert( i->_idx >= k->_idx, "Idealize should return new nodes, use Identity to return old nodes" );
+    k = i;
+    assert(loop_count++ < K, "infinite loop in PhaseGVN::transform");
+  }
+  NOT_PRODUCT( if( loop_count != 0 ) { set_progress(); } )
+
+
+  // If brand new node, make space in type array.
+  ensure_type_or_null(k);
+
+  // Since I just called 'Value' to compute the set of run-time values
+  // for this Node, and 'Value' is non-local (and therefore expensive) I'll
+  // cache Value.  Later requests for the local phase->type of this Node can
+  // use the cached Value instead of suffering with 'bottom_type'.
+  const Type *t = k->Value(this); // Get runtime Value set
+  assert(t != NULL, "value sanity");
+  if (type_or_null(k) != t) {
+#ifndef PRODUCT
+    // Do not count initial visit to node as a transformation
+    if (type_or_null(k) == NULL) {
+      inc_new_values();
+      set_progress();
+    }
+#endif
+    set_type(k, t);
+    // If k is a TypeNode, capture any more-precise type permanently into Node
+    k->raise_bottom_type(t);
+  }
+
+  if( t->singleton() && !k->is_Con() ) {
+    NOT_PRODUCT( set_progress(); )
+    return makecon(t);          // Turn into a constant
+  }
+
+  // Now check for Identities
+  Node *i = k->Identity(this);  // Look for a nearby replacement
+  if( i != k ) {                // Found? Return replacement!
+    NOT_PRODUCT( set_progress(); )
+    return i;
+  }
+
+  // Global Value Numbering
+  i = hash_find_insert(k);      // Insert if new
+  if( i && (i != k) ) {
+    // Return the pre-existing node
+    NOT_PRODUCT( set_progress(); )
+    return i;
+  }
+
+  // Return Idealized original
+  return k;
+}
+
+#ifdef ASSERT
+//------------------------------dead_loop_check--------------------------------
+// Check for a simple dead loop when a data node references itself direcly
+// or through an other data node excluding cons and phis.
+void PhaseGVN::dead_loop_check( Node *n ) {
+  // Phi may reference itself in a loop
+  if (n != NULL && !n->is_dead_loop_safe() && !n->is_CFG()) {
+    // Do 2 levels check and only data inputs.
+    bool no_dead_loop = true;
+    uint cnt = n->req();
+    for (uint i = 1; i < cnt && no_dead_loop; i++) {
+      Node *in = n->in(i);
+      if (in == n) {
+        no_dead_loop = false;
+      } else if (in != NULL && !in->is_dead_loop_safe()) {
+        uint icnt = in->req();
+        for (uint j = 1; j < icnt && no_dead_loop; j++) {
+          if (in->in(j) == n || in->in(j) == in)
+            no_dead_loop = false;
+        }
+      }
+    }
+    if (!no_dead_loop) n->dump(3);
+    assert(no_dead_loop, "dead loop detected");
+  }
+}
+#endif
+
+//=============================================================================
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize hash table to fresh and clean for +VerifyOpto
+PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ) : PhaseGVN(igvn,dummy), _worklist( ) {
+}
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize with previous PhaseIterGVN info; used by PhaseCCP
+PhaseIterGVN::PhaseIterGVN( PhaseIterGVN *igvn ) : PhaseGVN(igvn),
+  _worklist( igvn->_worklist )
+{
+}
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Initialize with previous PhaseGVN info from Parser
+PhaseIterGVN::PhaseIterGVN( PhaseGVN *gvn ) : PhaseGVN(gvn),
+  _worklist(*C->for_igvn())
+{
+  uint max;
+
+  // Dead nodes in the hash table inherited from GVN were not treated as
+  // roots during def-use info creation; hence they represent an invisible
+  // use.  Clear them out.
+  max = _table.size();
+  for( uint i = 0; i < max; ++i ) {
+    Node *n = _table.at(i);
+    if(n != NULL && n != _table.sentinel() && n->outcnt() == 0) {
+      if( n->is_top() ) continue;
+      assert( false, "Parse::remove_useless_nodes missed this node");
+      hash_delete(n);
+    }
+  }
+
+  // Any Phis or Regions on the worklist probably had uses that could not
+  // make more progress because the uses were made while the Phis and Regions
+  // were in half-built states.  Put all uses of Phis and Regions on worklist.
+  max = _worklist.size();
+  for( uint j = 0; j < max; j++ ) {
+    Node *n = _worklist.at(j);
+    uint uop = n->Opcode();
+    if( uop == Op_Phi || uop == Op_Region ||
+        n->is_Type() ||
+        n->is_Mem() )
+      add_users_to_worklist(n);
+  }
+}
+
+
+#ifndef PRODUCT
+void PhaseIterGVN::verify_step(Node* n) {
+  _verify_window[_verify_counter % _verify_window_size] = n;
+  ++_verify_counter;
+  ResourceMark rm;
+  ResourceArea *area = Thread::current()->resource_area();
+  VectorSet old_space(area), new_space(area);
+  if (C->unique() < 1000 ||
+      0 == _verify_counter % (C->unique() < 10000 ? 10 : 100)) {
+    ++_verify_full_passes;
+    Node::verify_recur(C->root(), -1, old_space, new_space);
+  }
+  const int verify_depth = 4;
+  for ( int i = 0; i < _verify_window_size; i++ ) {
+    Node* n = _verify_window[i];
+    if ( n == NULL )  continue;
+    if( n->in(0) == NodeSentinel ) {  // xform_idom
+      _verify_window[i] = n->in(1);
+      --i; continue;
+    }
+    // Typical fanout is 1-2, so this call visits about 6 nodes.
+    Node::verify_recur(n, verify_depth, old_space, new_space);
+  }
+}
+#endif
+
+
+//------------------------------init_worklist----------------------------------
+// Initialize worklist for each node.
+void PhaseIterGVN::init_worklist( Node *n ) {
+  if( _worklist.member(n) ) return;
+  _worklist.push(n);
+  uint cnt = n->req();
+  for( uint i =0 ; i < cnt; i++ ) {
+    Node *m = n->in(i);
+    if( m ) init_worklist(m);
+  }
+}
+
+//------------------------------optimize---------------------------------------
+void PhaseIterGVN::optimize() {
+  debug_only(uint num_processed  = 0;);
+#ifndef PRODUCT
+  {
+    _verify_counter = 0;
+    _verify_full_passes = 0;
+    for ( int i = 0; i < _verify_window_size; i++ ) {
+      _verify_window[i] = NULL;
+    }
+  }
+#endif
+
+  // Pull from worklist; transform node;
+  // If node has changed: update edge info and put uses on worklist.
+  while( _worklist.size() ) {
+    Node *n  = _worklist.pop();
+    if (TraceIterativeGVN && Verbose) {
+      tty->print("  Pop ");
+      NOT_PRODUCT( n->dump(); )
+      debug_only(if( (num_processed++ % 100) == 0 ) _worklist.print_set();)
+    }
+
+    if (n->outcnt() != 0) {
+
+#ifndef PRODUCT
+      uint wlsize = _worklist.size();
+      const Type* oldtype = type_or_null(n);
+#endif //PRODUCT
+
+      Node *nn = transform_old(n);
+
+#ifndef PRODUCT
+      if (TraceIterativeGVN) {
+        const Type* newtype = type_or_null(n);
+        if (nn != n) {
+          // print old node
+          tty->print("< ");
+          if (oldtype != newtype && oldtype != NULL) {
+            oldtype->dump();
+          }
+          do { tty->print("\t"); } while (tty->position() < 16);
+          tty->print("<");
+          n->dump();
+        }
+        if (oldtype != newtype || nn != n) {
+          // print new node and/or new type
+          if (oldtype == NULL) {
+            tty->print("* ");
+          } else if (nn != n) {
+            tty->print("> ");
+          } else {
+            tty->print("= ");
+          }
+          if (newtype == NULL) {
+            tty->print("null");
+          } else {
+            newtype->dump();
+          }
+          do { tty->print("\t"); } while (tty->position() < 16);
+          nn->dump();
+        }
+        if (Verbose && wlsize < _worklist.size()) {
+          tty->print("  Push {");
+          while (wlsize != _worklist.size()) {
+            Node* pushed = _worklist.at(wlsize++);
+            tty->print(" %d", pushed->_idx);
+          }
+          tty->print_cr(" }");
+        }
+      }
+      if( VerifyIterativeGVN && nn != n ) {
+        verify_step((Node*) NULL);  // ignore n, it might be subsumed
+      }
+#endif
+    } else if (!n->is_top()) {
+      remove_dead_node(n);
+    }
+  }
+
+#ifndef PRODUCT
+  C->verify_graph_edges();
+  if( VerifyOpto && allow_progress() ) {
+    // Must turn off allow_progress to enable assert and break recursion
+    C->root()->verify();
+    { // Check if any progress was missed using IterGVN
+      // Def-Use info enables transformations not attempted in wash-pass
+      // e.g. Region/Phi cleanup, ...
+      // Null-check elision -- may not have reached fixpoint
+      //                       do not propagate to dominated nodes
+      ResourceMark rm;
+      PhaseIterGVN igvn2(this,"Verify"); // Fresh and clean!
+      // Fill worklist completely
+      igvn2.init_worklist(C->root());
+
+      igvn2.set_allow_progress(false);
+      igvn2.optimize();
+      igvn2.set_allow_progress(true);
+    }
+  }
+  if ( VerifyIterativeGVN && PrintOpto ) {
+    if ( _verify_counter == _verify_full_passes )
+      tty->print_cr("VerifyIterativeGVN: %d transforms and verify passes",
+                    _verify_full_passes);
+    else
+      tty->print_cr("VerifyIterativeGVN: %d transforms, %d full verify passes",
+                  _verify_counter, _verify_full_passes);
+  }
+#endif
+}
+
+
+//------------------register_new_node_with_optimizer---------------------------
+// Register a new node with the optimizer.  Update the types array, the def-use
+// info.  Put on worklist.
+Node* PhaseIterGVN::register_new_node_with_optimizer(Node* n, Node* orig) {
+  set_type_bottom(n);
+  _worklist.push(n);
+  if (orig != NULL)  C->copy_node_notes_to(n, orig);
+  return n;
+}
+
+//------------------------------transform--------------------------------------
+// Non-recursive: idealize Node 'n' with respect to its inputs and its value
+Node *PhaseIterGVN::transform( Node *n ) {
+  // If brand new node, make space in type array, and give it a type.
+  ensure_type_or_null(n);
+  if (type_or_null(n) == NULL) {
+    set_type_bottom(n);
+  }
+
+  return transform_old(n);
+}
+
+//------------------------------transform_old----------------------------------
+Node *PhaseIterGVN::transform_old( Node *n ) {
+#ifndef PRODUCT
+  debug_only(uint loop_count = 0;);
+  set_transforms();
+#endif
+  // Remove 'n' from hash table in case it gets modified
+  _table.hash_delete(n);
+  if( VerifyIterativeGVN ) {
+   assert( !_table.find_index(n->_idx), "found duplicate entry in table");
+  }
+
+  // Apply the Ideal call in a loop until it no longer applies
+  Node *k = n;
+  DEBUG_ONLY(dead_loop_check(k);)
+  Node *i = k->Ideal(this, /*can_reshape=*/true);
+#ifndef PRODUCT
+  if( VerifyIterativeGVN )
+    verify_step(k);
+  if( i && VerifyOpto ) {
+    if( !allow_progress() ) {
+      if (i->is_Add() && i->outcnt() == 1) {
+        // Switched input to left side because this is the only use
+      } else if( i->is_If() && (i->in(0) == NULL) ) {
+        // This IF is dead because it is dominated by an equivalent IF When
+        // dominating if changed, info is not propagated sparsely to 'this'
+        // Propagating this info further will spuriously identify other
+        // progress.
+        return i;
+      } else
+        set_progress();
+    } else
+      set_progress();
+  }
+#endif
+
+  while( i ) {
+#ifndef PRODUCT
+    debug_only( if( loop_count >= K ) i->dump(4); )
+    assert(loop_count < K, "infinite loop in PhaseIterGVN::transform");
+    debug_only( loop_count++; )
+#endif
+    assert((i->_idx >= k->_idx) || i->is_top(), "Idealize should return new nodes, use Identity to return old nodes");
+    // Made a change; put users of original Node on worklist
+    add_users_to_worklist( k );
+    // Replacing root of transform tree?
+    if( k != i ) {
+      // Make users of old Node now use new.
+      subsume_node( k, i );
+      k = i;
+    }
+    DEBUG_ONLY(dead_loop_check(k);)
+    // Try idealizing again
+    i = k->Ideal(this, /*can_reshape=*/true);
+#ifndef PRODUCT
+    if( VerifyIterativeGVN )
+      verify_step(k);
+    if( i && VerifyOpto ) set_progress();
+#endif
+  }
+
+  // If brand new node, make space in type array.
+  ensure_type_or_null(k);
+
+  // See what kind of values 'k' takes on at runtime
+  const Type *t = k->Value(this);
+  assert(t != NULL, "value sanity");
+
+  // Since I just called 'Value' to compute the set of run-time values
+  // for this Node, and 'Value' is non-local (and therefore expensive) I'll
+  // cache Value.  Later requests for the local phase->type of this Node can
+  // use the cached Value instead of suffering with 'bottom_type'.
+  if (t != type_or_null(k)) {
+    NOT_PRODUCT( set_progress(); )
+    NOT_PRODUCT( inc_new_values();)
+    set_type(k, t);
+    // If k is a TypeNode, capture any more-precise type permanently into Node
+    k->raise_bottom_type(t);
+    // Move users of node to worklist
+    add_users_to_worklist( k );
+  }
+
+  // If 'k' computes a constant, replace it with a constant
+  if( t->singleton() && !k->is_Con() ) {
+    NOT_PRODUCT( set_progress(); )
+    Node *con = makecon(t);     // Make a constant
+    add_users_to_worklist( k );
+    subsume_node( k, con );     // Everybody using k now uses con
+    return con;
+  }
+
+  // Now check for Identities
+  i = k->Identity(this);        // Look for a nearby replacement
+  if( i != k ) {                // Found? Return replacement!
+    NOT_PRODUCT( set_progress(); )
+    add_users_to_worklist( k );
+    subsume_node( k, i );       // Everybody using k now uses i
+    return i;
+  }
+
+  // Global Value Numbering
+  i = hash_find_insert(k);      // Check for pre-existing node
+  if( i && (i != k) ) {
+    // Return the pre-existing node if it isn't dead
+    NOT_PRODUCT( set_progress(); )
+    add_users_to_worklist( k );
+    subsume_node( k, i );       // Everybody using k now uses i
+    return i;
+  }
+
+  // Return Idealized original
+  return k;
+}
+
+//---------------------------------saturate------------------------------------
+const Type* PhaseIterGVN::saturate(const Type* new_type, const Type* old_type,
+                                   const Type* limit_type) const {
+  return new_type->narrow(old_type);
+}
+
+//------------------------------remove_globally_dead_node----------------------
+// Kill a globally dead Node.  All uses are also globally dead and are
+// aggressively trimmed.
+void PhaseIterGVN::remove_globally_dead_node( Node *dead ) {
+  assert(dead != C->root(), "killing root, eh?");
+  if (dead->is_top())  return;
+  NOT_PRODUCT( set_progress(); )
+  // Remove from iterative worklist
+  _worklist.remove(dead);
+  if (!dead->is_Con()) { // Don't kill cons but uses
+    // Remove from hash table
+    _table.hash_delete( dead );
+    // Smash all inputs to 'dead', isolating him completely
+    for( uint i = 0; i < dead->req(); i++ ) {
+      Node *in = dead->in(i);
+      if( in ) {                 // Points to something?
+        dead->set_req(i,NULL);  // Kill the edge
+        if (in->outcnt() == 0 && in != C->top()) {// Made input go dead?
+          remove_dead_node(in); // Recursively remove
+        } else if (in->outcnt() == 1 &&
+                   in->has_special_unique_user()) {
+          _worklist.push(in->unique_out());
+        } else if (in->outcnt() <= 2 && dead->is_Phi()) {
+          if( in->Opcode() == Op_Region )
+            _worklist.push(in);
+          else if( in->is_Store() ) {
+            DUIterator_Fast imax, i = in->fast_outs(imax);
+            _worklist.push(in->fast_out(i));
+            i++;
+            if(in->outcnt() == 2) {
+              _worklist.push(in->fast_out(i));
+              i++;
+            }
+            assert(!(i < imax), "sanity");
+          }
+        }
+      }
+    }
+
+    if (dead->is_macro()) {
+      C->remove_macro_node(dead);
+    }
+  }
+  // Aggressively kill globally dead uses
+  // (Cannot use DUIterator_Last because of the indefinite number
+  // of edge deletions per loop trip.)
+  while (dead->outcnt() > 0) {
+    remove_globally_dead_node(dead->raw_out(0));
+  }
+}
+
+//------------------------------subsume_node-----------------------------------
+// Remove users from node 'old' and add them to node 'nn'.
+void PhaseIterGVN::subsume_node( Node *old, Node *nn ) {
+  assert( old != hash_find(old), "should already been removed" );
+  assert( old != C->top(), "cannot subsume top node");
+  // Copy debug or profile information to the new version:
+  C->copy_node_notes_to(nn, old);
+  // Move users of node 'old' to node 'nn'
+  for (DUIterator_Last imin, i = old->last_outs(imin); i >= imin; ) {
+    Node* use = old->last_out(i);  // for each use...
+    // use might need re-hashing (but it won't if it's a new node)
+    bool is_in_table = _table.hash_delete( use );
+    // Update use-def info as well
+    // We remove all occurrences of old within use->in,
+    // so as to avoid rehashing any node more than once.
+    // The hash table probe swamps any outer loop overhead.
+    uint num_edges = 0;
+    for (uint jmax = use->len(), j = 0; j < jmax; j++) {
+      if (use->in(j) == old) {
+        use->set_req(j, nn);
+        ++num_edges;
+      }
+    }
+    // Insert into GVN hash table if unique
+    // If a duplicate, 'use' will be cleaned up when pulled off worklist
+    if( is_in_table ) {
+      hash_find_insert(use);
+    }
+    i -= num_edges;    // we deleted 1 or more copies of this edge
+  }
+
+  // Smash all inputs to 'old', isolating him completely
+  Node *temp = new (C, 1) Node(1);
+  temp->init_req(0,nn);     // Add a use to nn to prevent him from dying
+  remove_dead_node( old );
+  temp->del_req(0);         // Yank bogus edge
+#ifndef PRODUCT
+  if( VerifyIterativeGVN ) {
+    for ( int i = 0; i < _verify_window_size; i++ ) {
+      if ( _verify_window[i] == old )
+        _verify_window[i] = nn;
+    }
+  }
+#endif
+  _worklist.remove(temp);   // this can be necessary
+  temp->destruct();         // reuse the _idx of this little guy
+}
+
+//------------------------------add_users_to_worklist--------------------------
+void PhaseIterGVN::add_users_to_worklist0( Node *n ) {
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    _worklist.push(n->fast_out(i));  // Push on worklist
+  }
+}
+
+void PhaseIterGVN::add_users_to_worklist( Node *n ) {
+  add_users_to_worklist0(n);
+
+  // Move users of node to worklist
+  for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+    Node* use = n->fast_out(i); // Get use
+
+    if( use->is_Multi() ||      // Multi-definer?  Push projs on worklist
+        use->is_Store() )       // Enable store/load same address
+      add_users_to_worklist0(use);
+
+    // If we changed the receiver type to a call, we need to revisit
+    // the Catch following the call.  It's looking for a non-NULL
+    // receiver to know when to enable the regular fall-through path
+    // in addition to the NullPtrException path.
+    if (use->is_CallDynamicJava() && n == use->in(TypeFunc::Parms)) {
+      Node* p = use->as_CallDynamicJava()->proj_out(TypeFunc::Control);
+      if (p != NULL) {
+        add_users_to_worklist0(p);
+      }
+    }
+
+    if( use->is_Cmp() ) {       // Enable CMP/BOOL optimization
+      add_users_to_worklist(use); // Put Bool on worklist
+      // Look for the 'is_x2logic' pattern: "x ? : 0 : 1" and put the
+      // phi merging either 0 or 1 onto the worklist
+      if (use->outcnt() > 0) {
+        Node* bol = use->raw_out(0);
+        if (bol->outcnt() > 0) {
+          Node* iff = bol->raw_out(0);
+          if (iff->outcnt() == 2) {
+            Node* ifproj0 = iff->raw_out(0);
+            Node* ifproj1 = iff->raw_out(1);
+            if (ifproj0->outcnt() > 0 && ifproj1->outcnt() > 0) {
+              Node* region0 = ifproj0->raw_out(0);
+              Node* region1 = ifproj1->raw_out(0);
+              if( region0 == region1 )
+                add_users_to_worklist0(region0);
+            }
+          }
+        }
+      }
+    }
+
+    uint use_op = use->Opcode();
+    // If changed Cast input, check Phi users for simple cycles
+    if( use->is_ConstraintCast() || use->Opcode() == Op_CheckCastPP ) {
+      for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+        Node* u = use->fast_out(i2);
+        if (u->is_Phi())
+          _worklist.push(u);
+      }
+    }
+    // If changed LShift inputs, check RShift users for useless sign-ext
+    if( use_op == Op_LShiftI ) {
+      for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+        Node* u = use->fast_out(i2);
+        if (u->Opcode() == Op_RShiftI)
+          _worklist.push(u);
+      }
+    }
+    // If changed AddP inputs, check Stores for loop invariant
+    if( use_op == Op_AddP ) {
+      for (DUIterator_Fast i2max, i2 = use->fast_outs(i2max); i2 < i2max; i2++) {
+        Node* u = use->fast_out(i2);
+        if (u->is_Mem())
+          _worklist.push(u);
+      }
+    }
+    // If changed initialization activity, check dependent Stores
+    if (use_op == Op_Allocate || use_op == Op_AllocateArray) {
+      InitializeNode* init = use->as_Allocate()->initialization();
+      if (init != NULL) {
+        Node* imem = init->proj_out(TypeFunc::Memory);
+        if (imem != NULL)  add_users_to_worklist0(imem);
+      }
+    }
+    if (use_op == Op_Initialize) {
+      Node* imem = use->as_Initialize()->proj_out(TypeFunc::Memory);
+      if (imem != NULL)  add_users_to_worklist0(imem);
+    }
+  }
+}
+
+//=============================================================================
+#ifndef PRODUCT
+uint PhaseCCP::_total_invokes   = 0;
+uint PhaseCCP::_total_constants = 0;
+#endif
+//------------------------------PhaseCCP---------------------------------------
+// Conditional Constant Propagation, ala Wegman & Zadeck
+PhaseCCP::PhaseCCP( PhaseIterGVN *igvn ) : PhaseIterGVN(igvn) {
+  NOT_PRODUCT( clear_constants(); )
+  assert( _worklist.size() == 0, "" );
+  // Clear out _nodes from IterGVN.  Must be clear to transform call.
+  _nodes.clear();               // Clear out from IterGVN
+  analyze();
+}
+
+#ifndef PRODUCT
+//------------------------------~PhaseCCP--------------------------------------
+PhaseCCP::~PhaseCCP() {
+  inc_invokes();
+  _total_constants += count_constants();
+}
+#endif
+
+
+#ifdef ASSERT
+static bool ccp_type_widens(const Type* t, const Type* t0) {
+  assert(t->meet(t0) == t, "Not monotonic");
+  switch (t->base() == t0->base() ? t->base() : Type::Top) {
+  case Type::Int:
+    assert(t0->isa_int()->_widen <= t->isa_int()->_widen, "widen increases");
+    break;
+  case Type::Long:
+    assert(t0->isa_long()->_widen <= t->isa_long()->_widen, "widen increases");
+    break;
+  }
+  return true;
+}
+#endif //ASSERT
+
+//------------------------------analyze----------------------------------------
+void PhaseCCP::analyze() {
+  // Initialize all types to TOP, optimistic analysis
+  for (int i = C->unique() - 1; i >= 0; i--)  {
+    _types.map(i,Type::TOP);
+  }
+
+  // Push root onto worklist
+  Unique_Node_List worklist;
+  worklist.push(C->root());
+
+  // Pull from worklist; compute new value; push changes out.
+  // This loop is the meat of CCP.
+  while( worklist.size() ) {
+    Node *n = worklist.pop();
+    const Type *t = n->Value(this);
+    if (t != type(n)) {
+      assert(ccp_type_widens(t, type(n)), "ccp type must widen");
+#ifndef PRODUCT
+      if( TracePhaseCCP ) {
+        t->dump();
+        do { tty->print("\t"); } while (tty->position() < 16);
+        n->dump();
+      }
+#endif
+      set_type(n, t);
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        Node* m = n->fast_out(i);   // Get user
+        if( m->is_Region() ) {  // New path to Region?  Must recheck Phis too
+          for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
+            Node* p = m->fast_out(i2); // Propagate changes to uses
+            if( p->bottom_type() != type(p) ) // If not already bottomed out
+              worklist.push(p); // Propagate change to user
+          }
+        }
+        // If we changed the reciever type to a call, we need to revisit
+        // the Catch following the call.  It's looking for a non-NULL
+        // receiver to know when to enable the regular fall-through path
+        // in addition to the NullPtrException path
+        if (m->is_Call()) {
+          for (DUIterator_Fast i2max, i2 = m->fast_outs(i2max); i2 < i2max; i2++) {
+            Node* p = m->fast_out(i2);  // Propagate changes to uses
+            if (p->is_Proj() && p->as_Proj()->_con == TypeFunc::Control && p->outcnt() == 1)
+              worklist.push(p->unique_out());
+          }
+        }
+        if( m->bottom_type() != type(m) ) // If not already bottomed out
+          worklist.push(m);     // Propagate change to user
+      }
+    }
+  }
+}
+
+//------------------------------do_transform-----------------------------------
+// Top level driver for the recursive transformer
+void PhaseCCP::do_transform() {
+  // Correct leaves of new-space Nodes; they point to old-space.
+  C->set_root( transform(C->root())->as_Root() );
+  assert( C->top(),  "missing TOP node" );
+  assert( C->root(), "missing root" );
+}
+
+//------------------------------transform--------------------------------------
+// Given a Node in old-space, clone him into new-space.
+// Convert any of his old-space children into new-space children.
+Node *PhaseCCP::transform( Node *n ) {
+  Node *new_node = _nodes[n->_idx]; // Check for transformed node
+  if( new_node != NULL )
+    return new_node;                // Been there, done that, return old answer
+  new_node = transform_once(n);     // Check for constant
+  _nodes.map( n->_idx, new_node );  // Flag as having been cloned
+
+  // Allocate stack of size _nodes.Size()/2 to avoid frequent realloc
+  GrowableArray <Node *> trstack(C->unique() >> 1);
+
+  trstack.push(new_node);           // Process children of cloned node
+  while ( trstack.is_nonempty() ) {
+    Node *clone = trstack.pop();
+    uint cnt = clone->req();
+    for( uint i = 0; i < cnt; i++ ) {          // For all inputs do
+      Node *input = clone->in(i);
+      if( input != NULL ) {                    // Ignore NULLs
+        Node *new_input = _nodes[input->_idx]; // Check for cloned input node
+        if( new_input == NULL ) {
+          new_input = transform_once(input);   // Check for constant
+          _nodes.map( input->_idx, new_input );// Flag as having been cloned
+          trstack.push(new_input);
+        }
+        assert( new_input == clone->in(i), "insanity check");
+      }
+    }
+  }
+  return new_node;
+}
+
+
+//------------------------------transform_once---------------------------------
+// For PhaseCCP, transformation is IDENTITY unless Node computed a constant.
+Node *PhaseCCP::transform_once( Node *n ) {
+  const Type *t = type(n);
+  // Constant?  Use constant Node instead
+  if( t->singleton() ) {
+    Node *nn = n;               // Default is to return the original constant
+    if( t == Type::TOP ) {
+      // cache my top node on the Compile instance
+      if( C->cached_top_node() == NULL || C->cached_top_node()->in(0) == NULL ) {
+        C->set_cached_top_node( ConNode::make(C, Type::TOP) );
+        set_type(C->top(), Type::TOP);
+      }
+      nn = C->top();
+    }
+    if( !n->is_Con() ) {
+      if( t != Type::TOP ) {
+        nn = makecon(t);        // ConNode::make(t);
+        NOT_PRODUCT( inc_constants(); )
+      } else if( n->is_Region() ) { // Unreachable region
+        // Note: nn == C->top()
+        n->set_req(0, NULL);        // Cut selfreference
+        // Eagerly remove dead phis to avoid phis copies creation.
+        for (DUIterator i = n->outs(); n->has_out(i); i++) {
+          Node* m = n->out(i);
+          if( m->is_Phi() ) {
+            assert(type(m) == Type::TOP, "Unreachable region should not have live phis.");
+            add_users_to_worklist(m);
+            hash_delete(m); // Yank from hash before hacking edges
+            subsume_node(m, nn);
+            --i; // deleted this phi; rescan starting with next position
+          }
+        }
+      }
+      add_users_to_worklist(n); // Users of about-to-be-constant 'n'
+      hash_delete(n);           // Removed 'n' from table before subsuming it
+      subsume_node(n,nn);       // Update DefUse edges for new constant
+    }
+    return nn;
+  }
+
+  // If x is a TypeNode, capture any more-precise type permanently into Node
+  if (t != n->bottom_type()) {
+    hash_delete(n);             // changing bottom type may force a rehash
+    n->raise_bottom_type(t);
+    _worklist.push(n);          // n re-enters the hash table via the worklist
+  }
+
+  // Idealize graph using DU info.  Must clone() into new-space.
+  // DU info is generally used to show profitability, progress or safety
+  // (but generally not needed for correctness).
+  Node *nn = n->Ideal_DU_postCCP(this);
+
+  // TEMPORARY fix to ensure that 2nd GVN pass eliminates NULL checks
+  switch( n->Opcode() ) {
+  case Op_FastLock:      // Revisit FastLocks for lock coarsening
+  case Op_If:
+  case Op_CountedLoopEnd:
+  case Op_Region:
+  case Op_Loop:
+  case Op_CountedLoop:
+  case Op_Conv2B:
+  case Op_Opaque1:
+  case Op_Opaque2:
+    _worklist.push(n);
+    break;
+  default:
+    break;
+  }
+  if( nn ) {
+    _worklist.push(n);
+    // Put users of 'n' onto worklist for second igvn transform
+    add_users_to_worklist(n);
+    return nn;
+  }
+
+  return  n;
+}
+
+//---------------------------------saturate------------------------------------
+const Type* PhaseCCP::saturate(const Type* new_type, const Type* old_type,
+                               const Type* limit_type) const {
+  const Type* wide_type = new_type->widen(old_type);
+  if (wide_type != new_type) {          // did we widen?
+    // If so, we may have widened beyond the limit type.  Clip it back down.
+    new_type = wide_type->filter(limit_type);
+  }
+  return new_type;
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void PhaseCCP::print_statistics() {
+  tty->print_cr("CCP: %d  constants found: %d", _total_invokes, _total_constants);
+}
+#endif
+
+
+//=============================================================================
+#ifndef PRODUCT
+uint PhasePeephole::_total_peepholes = 0;
+#endif
+//------------------------------PhasePeephole----------------------------------
+// Conditional Constant Propagation, ala Wegman & Zadeck
+PhasePeephole::PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg )
+  : PhaseTransform(Peephole), _regalloc(regalloc), _cfg(cfg) {
+  NOT_PRODUCT( clear_peepholes(); )
+}
+
+#ifndef PRODUCT
+//------------------------------~PhasePeephole---------------------------------
+PhasePeephole::~PhasePeephole() {
+  _total_peepholes += count_peepholes();
+}
+#endif
+
+//------------------------------transform--------------------------------------
+Node *PhasePeephole::transform( Node *n ) {
+  ShouldNotCallThis();
+  return NULL;
+}
+
+//------------------------------do_transform-----------------------------------
+void PhasePeephole::do_transform() {
+  bool method_name_not_printed = true;
+
+  // Examine each basic block
+  for( uint block_number = 1; block_number < _cfg._num_blocks; ++block_number ) {
+    Block *block = _cfg._blocks[block_number];
+    bool block_not_printed = true;
+
+    // and each instruction within a block
+    uint end_index = block->_nodes.size();
+    // block->end_idx() not valid after PhaseRegAlloc
+    for( uint instruction_index = 1; instruction_index < end_index; ++instruction_index ) {
+      Node     *n = block->_nodes.at(instruction_index);
+      if( n->is_Mach() ) {
+        MachNode *m = n->as_Mach();
+        int deleted_count = 0;
+        // check for peephole opportunities
+        MachNode *m2 = m->peephole( block, instruction_index, _regalloc, deleted_count, C );
+        if( m2 != NULL ) {
+#ifndef PRODUCT
+          if( PrintOptoPeephole ) {
+            // Print method, first time only
+            if( C->method() && method_name_not_printed ) {
+              C->method()->print_short_name(); tty->cr();
+              method_name_not_printed = false;
+            }
+            // Print this block
+            if( Verbose && block_not_printed) {
+              tty->print_cr("in block");
+              block->dump();
+              block_not_printed = false;
+            }
+            // Print instructions being deleted
+            for( int i = (deleted_count - 1); i >= 0; --i ) {
+              block->_nodes.at(instruction_index-i)->as_Mach()->format(_regalloc); tty->cr();
+            }
+            tty->print_cr("replaced with");
+            // Print new instruction
+            m2->format(_regalloc);
+            tty->print("\n\n");
+          }
+#endif
+          // Remove old nodes from basic block and update instruction_index
+          // (old nodes still exist and may have edges pointing to them
+          //  as register allocation info is stored in the allocator using
+          //  the node index to live range mappings.)
+          uint safe_instruction_index = (instruction_index - deleted_count);
+          for( ; (instruction_index > safe_instruction_index); --instruction_index ) {
+            block->_nodes.remove( instruction_index );
+          }
+          // install new node after safe_instruction_index
+          block->_nodes.insert( safe_instruction_index + 1, m2 );
+          end_index = block->_nodes.size() - 1; // Recompute new block size
+          NOT_PRODUCT( inc_peepholes(); )
+        }
+      }
+    }
+  }
+}
+
+//------------------------------print_statistics-------------------------------
+#ifndef PRODUCT
+void PhasePeephole::print_statistics() {
+  tty->print_cr("Peephole: peephole rules applied: %d",  _total_peepholes);
+}
+#endif
+
+
+//=============================================================================
+//------------------------------set_req_X--------------------------------------
+void Node::set_req_X( uint i, Node *n, PhaseIterGVN *igvn ) {
+  assert( is_not_dead(n), "can not use dead node");
+  assert( igvn->hash_find(this) != this, "Need to remove from hash before changing edges" );
+  Node *old = in(i);
+  set_req(i, n);
+
+  // old goes dead?
+  if( old ) {
+    switch (old->outcnt()) {
+    case 0:      // Kill all his inputs, and recursively kill other dead nodes.
+      if (!old->is_top())
+        igvn->remove_dead_node( old );
+      break;
+    case 1:
+      if( old->is_Store() || old->has_special_unique_user() )
+        igvn->add_users_to_worklist( old );
+      break;
+    case 2:
+      if( old->is_Store() )
+        igvn->add_users_to_worklist( old );
+      if( old->Opcode() == Op_Region )
+        igvn->_worklist.push(old);
+      break;
+    case 3:
+      if( old->Opcode() == Op_Region ) {
+        igvn->_worklist.push(old);
+        igvn->add_users_to_worklist( old );
+      }
+      break;
+    default:
+      break;
+    }
+  }
+
+}
+
+//-------------------------------replace_by-----------------------------------
+// Using def-use info, replace one node for another.  Follow the def-use info
+// to all users of the OLD node.  Then make all uses point to the NEW node.
+void Node::replace_by(Node *new_node) {
+  assert(!is_top(), "top node has no DU info");
+  for (DUIterator_Last imin, i = last_outs(imin); i >= imin; ) {
+    Node* use = last_out(i);
+    uint uses_found = 0;
+    for (uint j = 0; j < use->len(); j++) {
+      if (use->in(j) == this) {
+        if (j < use->req())
+              use->set_req(j, new_node);
+        else  use->set_prec(j, new_node);
+        uses_found++;
+      }
+    }
+    i -= uses_found;    // we deleted 1 or more copies of this edge
+  }
+}
+
+//=============================================================================
+//-----------------------------------------------------------------------------
+void Type_Array::grow( uint i ) {
+  if( !_max ) {
+    _max = 1;
+    _types = (const Type**)_a->Amalloc( _max * sizeof(Type*) );
+    _types[0] = NULL;
+  }
+  uint old = _max;
+  while( i >= _max ) _max <<= 1;        // Double to fit
+  _types = (const Type**)_a->Arealloc( _types, old*sizeof(Type*),_max*sizeof(Type*));
+  memset( &_types[old], 0, (_max-old)*sizeof(Type*) );
+}
+
+//------------------------------dump-------------------------------------------
+#ifndef PRODUCT
+void Type_Array::dump() const {
+  uint max = Size();
+  for( uint i = 0; i < max; i++ ) {
+    if( _types[i] != NULL ) {
+      tty->print("  %d\t== ", i); _types[i]->dump(); tty->cr();
+    }
+  }
+}
+#endif
diff --git a/src/share/vm/opto/phaseX.hpp b/src/share/vm/opto/phaseX.hpp
new file mode 100644
index 000000000..46439c91e
--- /dev/null
+++ b/src/share/vm/opto/phaseX.hpp
@@ -0,0 +1,516 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Compile;
+class ConINode;
+class ConLNode;
+class Node;
+class Type;
+class PhaseTransform;
+class   PhaseGVN;
+class     PhaseIterGVN;
+class       PhaseCCP;
+class   PhasePeephole;
+class   PhaseRegAlloc;
+
+
+//-----------------------------------------------------------------------------
+// Expandable closed hash-table of nodes, initialized to NULL.
+// Note that the constructor just zeros things
+// Storage is reclaimed when the Arena's lifetime is over.
+class NodeHash : public StackObj {
+protected:
+  Arena *_a;                    // Arena to allocate in
+  uint   _max;                  // Size of table (power of 2)
+  uint   _inserts;              // For grow and debug, count of hash_inserts
+  uint   _insert_limit;         // 'grow' when _inserts reaches _insert_limit
+  Node **_table;                // Hash table of Node pointers
+  Node  *_sentinel;             // Replaces deleted entries in hash table
+
+public:
+  NodeHash(uint est_max_size);
+  NodeHash(Arena *arena, uint est_max_size);
+  NodeHash(NodeHash *use_this_state);
+#ifdef ASSERT
+  ~NodeHash();                  // Unlock all nodes upon destruction of table.
+  void operator=(const NodeHash&); // Unlock all nodes upon replacement of table.
+#endif
+  Node  *hash_find(const Node*);// Find an equivalent version in hash table
+  Node  *hash_find_insert(Node*);// If not in table insert else return found node
+  void   hash_insert(Node*);    // Insert into hash table
+  bool   hash_delete(const Node*);// Replace with _sentinel in hash table
+  void   check_grow() {
+    _inserts++;
+    if( _inserts == _insert_limit ) { grow(); }
+    assert( _inserts <= _insert_limit, "hash table overflow");
+    assert( _inserts < _max, "hash table overflow" );
+  }
+  static uint round_up(uint);   // Round up to nearest power of 2
+  void   grow();                // Grow _table to next power of 2 and rehash
+  // Return 75% of _max, rounded up.
+  uint   insert_limit() const { return _max - (_max>>2); }
+
+  void   clear();               // Set all entries to NULL, keep storage.
+  // Size of hash table
+  uint   size()         const { return _max; }
+  // Return Node* at index in table
+  Node  *at(uint table_index) {
+    assert(table_index < _max, "Must be within table");
+    return _table[table_index];
+  }
+
+  void   remove_useless_nodes(VectorSet &useful); // replace with sentinel
+
+  Node  *sentinel() { return _sentinel; }
+
+#ifndef PRODUCT
+  Node  *find_index(uint idx);  // For debugging
+  void   dump();                // For debugging, dump statistics
+#endif
+  uint   _grows;                // For debugging, count of table grow()s
+  uint   _look_probes;          // For debugging, count of hash probes
+  uint   _lookup_hits;          // For debugging, count of hash_finds
+  uint   _lookup_misses;        // For debugging, count of hash_finds
+  uint   _insert_probes;        // For debugging, count of hash probes
+  uint   _delete_probes;        // For debugging, count of hash probes for deletes
+  uint   _delete_hits;          // For debugging, count of hash probes for deletes
+  uint   _delete_misses;        // For debugging, count of hash probes for deletes
+  uint   _total_inserts;        // For debugging, total inserts into hash table
+  uint   _total_insert_probes;  // For debugging, total probes while inserting
+};
+
+
+//-----------------------------------------------------------------------------
+// Map dense integer indices to Types.  Uses classic doubling-array trick.
+// Abstractly provides an infinite array of Type*'s, initialized to NULL.
+// Note that the constructor just zeros things, and since I use Arena
+// allocation I do not need a destructor to reclaim storage.
+// Despite the general name, this class is customized for use by PhaseTransform.
+class Type_Array : public StackObj {
+  Arena *_a;                    // Arena to allocate in
+  uint   _max;
+  const Type **_types;
+  void grow( uint i );          // Grow array node to fit
+  const Type *operator[] ( uint i ) const // Lookup, or NULL for not mapped
+  { return (i<_max) ? _types[i] : (Type*)NULL; }
+  friend class PhaseTransform;
+public:
+  Type_Array(Arena *a) : _a(a), _max(0), _types(0) {}
+  Type_Array(Type_Array *ta) : _a(ta->_a), _max(ta->_max), _types(ta->_types) { }
+  const Type *fast_lookup(uint i) const{assert(i<_max,"oob");return _types[i];}
+  // Extend the mapping: index i maps to Type *n.
+  void map( uint i, const Type *n ) { if( i>=_max ) grow(i); _types[i] = n; }
+  uint Size() const { return _max; }
+#ifndef PRODUCT
+  void dump() const;
+#endif
+};
+
+
+//------------------------------PhaseRemoveUseless-----------------------------
+// Remove useless nodes from GVN hash-table, worklist, and graph
+class PhaseRemoveUseless : public Phase {
+protected:
+  Unique_Node_List _useful;   // Nodes reachable from root
+                              // list is allocated from current resource area
+public:
+  PhaseRemoveUseless( PhaseGVN *gvn, Unique_Node_List *worklist );
+
+  Unique_Node_List *get_useful() { return &_useful; }
+};
+
+
+//------------------------------PhaseTransform---------------------------------
+// Phases that analyze, then transform.  Constructing the Phase object does any
+// global or slow analysis.  The results are cached later for a fast
+// transformation pass.  When the Phase object is deleted the cached analysis
+// results are deleted.
+class PhaseTransform : public Phase {
+protected:
+  Arena*     _arena;
+  Node_Array _nodes;           // Map old node indices to new nodes.
+  Type_Array _types;           // Map old node indices to Types.
+
+  // ConNode caches:
+  enum { _icon_min = -1 * HeapWordSize,
+         _icon_max = 16 * HeapWordSize,
+         _lcon_min = _icon_min,
+         _lcon_max = _icon_max,
+         _zcon_max = (uint)T_CONFLICT
+  };
+  ConINode* _icons[_icon_max - _icon_min + 1];   // cached jint constant nodes
+  ConLNode* _lcons[_lcon_max - _lcon_min + 1];   // cached jlong constant nodes
+  ConNode*  _zcons[_zcon_max + 1];               // cached is_zero_type nodes
+  void init_con_caches();
+
+  // Support both int and long caches because either might be an intptr_t,
+  // so they show up frequently in address computations.
+
+public:
+  PhaseTransform( PhaseNumber pnum );
+  PhaseTransform( Arena *arena, PhaseNumber pnum );
+  PhaseTransform( PhaseTransform *phase, PhaseNumber pnum );
+
+  Arena*      arena()   { return _arena; }
+  Type_Array& types()   { return _types; }
+  // _nodes is used in varying ways by subclasses, which define local accessors
+
+public:
+  // Get a previously recorded type for the node n.
+  // This type must already have been recorded.
+  // If you want the type of a very new (untransformed) node,
+  // you must use type_or_null, and test the result for NULL.
+  const Type* type(const Node* n) const {
+    const Type* t = _types.fast_lookup(n->_idx);
+    assert(t != NULL, "must set before get");
+    return t;
+  }
+  // Get a previously recorded type for the node n,
+  // or else return NULL if there is none.
+  const Type* type_or_null(const Node* n) const {
+    return _types.fast_lookup(n->_idx);
+  }
+  // Record a type for a node.
+  void    set_type(const Node* n, const Type *t) {
+    assert(t != NULL, "type must not be null");
+    _types.map(n->_idx, t);
+  }
+  // Record an initial type for a node, the node's bottom type.
+  void    set_type_bottom(const Node* n) {
+    // Use this for initialization when bottom_type() (or better) is not handy.
+    // Usually the initialization shoudl be to n->Value(this) instead,
+    // or a hand-optimized value like Type::MEMORY or Type::CONTROL.
+    assert(_types[n->_idx] == NULL, "must set the initial type just once");
+    _types.map(n->_idx, n->bottom_type());
+  }
+  // Make sure the types array is big enough to record a size for the node n.
+  // (In product builds, we never want to do range checks on the types array!)
+  void ensure_type_or_null(const Node* n) {
+    if (n->_idx >= _types.Size())
+      _types.map(n->_idx, NULL);   // Grow the types array as needed.
+  }
+
+  // Utility functions:
+  const TypeInt*  find_int_type( Node* n);
+  const TypeLong* find_long_type(Node* n);
+  jint  find_int_con( Node* n, jint  value_if_unknown) {
+    const TypeInt* t = find_int_type(n);
+    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+  }
+  jlong find_long_con(Node* n, jlong value_if_unknown) {
+    const TypeLong* t = find_long_type(n);
+    return (t != NULL && t->is_con()) ? t->get_con() : value_if_unknown;
+  }
+
+  // Make an idealized constant, i.e., one of ConINode, ConPNode, ConFNode, etc.
+  // Same as transform(ConNode::make(t)).
+  ConNode* makecon(const Type* t);
+  virtual ConNode* uncached_makecon(const Type* t)  // override in PhaseValues
+  { ShouldNotCallThis(); return NULL; }
+
+  // Fast int or long constant.  Same as TypeInt::make(i) or TypeLong::make(l).
+  ConINode* intcon(jint i);
+  ConLNode* longcon(jlong l);
+
+  // Fast zero or null constant.  Same as makecon(Type::get_zero_type(bt)).
+  ConNode* zerocon(BasicType bt);
+
+  // Return a node which computes the same function as this node, but
+  // in a faster or cheaper fashion.
+  virtual Node *transform( Node *n ) = 0;
+
+  // Return whether two Nodes are equivalent.
+  // Must not be recursive, since the recursive version is built from this.
+  // For pessimistic optimizations this is simply pointer equivalence.
+  bool eqv(const Node* n1, const Node* n2) const { return n1 == n2; }
+
+  // Return whether two Nodes are equivalent, after stripping casting.
+  bool eqv_uncast(const Node* n1, const Node* n2) const {
+    return eqv(n1->uncast(), n2->uncast());
+  }
+
+  // For pessimistic passes, the return type must monotonically narrow.
+  // For optimistic  passes, the return type must monotonically widen.
+  // It is possible to get into a "death march" in either type of pass,
+  // where the types are continually moving but it will take 2**31 or
+  // more steps to converge.  This doesn't happen on most normal loops.
+  //
+  // Here is an example of a deadly loop for an optimistic pass, along
+  // with a partial trace of inferred types:
+  //    x = phi(0,x'); L: x' = x+1; if (x' >= 0) goto L;
+  //    0                 1                join([0..max], 1)
+  //    [0..1]            [1..2]           join([0..max], [1..2])
+  //    [0..2]            [1..3]           join([0..max], [1..3])
+  //      ... ... ...
+  //    [0..max]          [min]u[1..max]   join([0..max], [min..max])
+  //    [0..max] ==> fixpoint
+  // We would have proven, the hard way, that the iteration space is all
+  // non-negative ints, with the loop terminating due to 32-bit overflow.
+  //
+  // Here is the corresponding example for a pessimistic pass:
+  //    x = phi(0,x'); L: x' = x-1; if (x' >= 0) goto L;
+  //    int               int              join([0..max], int)
+  //    [0..max]          [-1..max-1]      join([0..max], [-1..max-1])
+  //    [0..max-1]        [-1..max-2]      join([0..max], [-1..max-2])
+  //      ... ... ...
+  //    [0..1]            [-1..0]          join([0..max], [-1..0])
+  //    0                 -1               join([0..max], -1)
+  //    0 == fixpoint
+  // We would have proven, the hard way, that the iteration space is {0}.
+  // (Usually, other optimizations will make the "if (x >= 0)" fold up
+  // before we get into trouble.  But not always.)
+  //
+  // It's a pleasant thing to observe that the pessimistic pass
+  // will make short work of the optimistic pass's deadly loop,
+  // and vice versa.  That is a good example of the complementary
+  // purposes of the CCP (optimistic) vs. GVN (pessimistic) phases.
+  //
+  // In any case, only widen or narrow a few times before going to the
+  // correct flavor of top or bottom.
+  //
+  // This call only needs to be made once as the data flows around any
+  // given cycle.  We do it at Phis, and nowhere else.
+  // The types presented are the new type of a phi (computed by PhiNode::Value)
+  // and the previously computed type, last time the phi was visited.
+  //
+  // The third argument is upper limit for the saturated value,
+  // if the phase wishes to widen the new_type.
+  // If the phase is narrowing, the old type provides a lower limit.
+  // Caller guarantees that old_type and new_type are no higher than limit_type.
+  virtual const Type* saturate(const Type* new_type, const Type* old_type,
+                               const Type* limit_type) const
+  { ShouldNotCallThis(); return NULL; }
+
+#ifndef PRODUCT
+  void dump_old2new_map() const;
+  void dump_new( uint new_lidx ) const;
+  void dump_types() const;
+  void dump_nodes_and_types(const Node *root, uint depth, bool only_ctrl = true);
+  void dump_nodes_and_types_recur( const Node *n, uint depth, bool only_ctrl, VectorSet &visited);
+
+  uint   _count_progress;       // For profiling, count transforms that make progress
+  void   set_progress()        { ++_count_progress; assert( allow_progress(),"No progress allowed during verification") }
+  void   clear_progress()      { _count_progress = 0; }
+  uint   made_progress() const { return _count_progress; }
+
+  uint   _count_transforms;     // For profiling, count transforms performed
+  void   set_transforms()      { ++_count_transforms; }
+  void   clear_transforms()    { _count_transforms = 0; }
+  uint   made_transforms() const{ return _count_transforms; }
+
+  bool   _allow_progress;      // progress not allowed during verification pass
+  void   set_allow_progress(bool allow) { _allow_progress = allow; }
+  bool   allow_progress()               { return _allow_progress; }
+#endif
+};
+
+//------------------------------PhaseValues------------------------------------
+// Phase infrastructure to support values
+class PhaseValues : public PhaseTransform {
+protected:
+  NodeHash  _table;             // Hash table for value-numbering
+
+public:
+  PhaseValues( Arena *arena, uint est_max_size );
+  PhaseValues( PhaseValues *pt );
+  PhaseValues( PhaseValues *ptv, const char *dummy );
+  NOT_PRODUCT( ~PhaseValues(); )
+  virtual PhaseIterGVN *is_IterGVN() { return 0; }
+
+  // Some Ideal and other transforms delete --> modify --> insert values
+  bool   hash_delete(Node *n)     { return _table.hash_delete(n); }
+  void   hash_insert(Node *n)     { _table.hash_insert(n); }
+  Node  *hash_find_insert(Node *n){ return _table.hash_find_insert(n); }
+  Node  *hash_find(const Node *n) { return _table.hash_find(n); }
+
+  // Used after parsing to eliminate values that are no longer in program
+  void   remove_useless_nodes(VectorSet &useful) { _table.remove_useless_nodes(useful); }
+
+  virtual ConNode* uncached_makecon(const Type* t);  // override from PhaseTransform
+
+  virtual const Type* saturate(const Type* new_type, const Type* old_type,
+                               const Type* limit_type) const
+  { return new_type; }
+
+#ifndef PRODUCT
+  uint   _count_new_values;     // For profiling, count new values produced
+  void    inc_new_values()        { ++_count_new_values; }
+  void    clear_new_values()      { _count_new_values = 0; }
+  uint    made_new_values() const { return _count_new_values; }
+#endif
+};
+
+
+//------------------------------PhaseGVN---------------------------------------
+// Phase for performing local, pessimistic GVN-style optimizations.
+class PhaseGVN : public PhaseValues {
+public:
+  PhaseGVN( Arena *arena, uint est_max_size ) : PhaseValues( arena, est_max_size ) {}
+  PhaseGVN( PhaseGVN *gvn ) : PhaseValues( gvn ) {}
+  PhaseGVN( PhaseGVN *gvn, const char *dummy ) : PhaseValues( gvn, dummy ) {}
+
+  // Return a node which computes the same function as this node, but
+  // in a faster or cheaper fashion.
+  Node  *transform( Node *n );
+  Node  *transform_no_reclaim( Node *n );
+
+  // Check for a simple dead loop when a data node references itself.
+  DEBUG_ONLY(void dead_loop_check(Node *n);)
+};
+
+//------------------------------PhaseIterGVN-----------------------------------
+// Phase for iteratively performing local, pessimistic GVN-style optimizations.
+// and ideal transformations on the graph.
+class PhaseIterGVN : public PhaseGVN {
+  // Idealize old Node 'n' with respect to its inputs and its value
+  virtual Node *transform_old( Node *a_node );
+protected:
+
+  // Idealize new Node 'n' with respect to its inputs and its value
+  virtual Node *transform( Node *a_node );
+
+  // Warm up hash table, type table and initial worklist
+  void init_worklist( Node *a_root );
+
+  virtual const Type* saturate(const Type* new_type, const Type* old_type,
+                               const Type* limit_type) const;
+  // Usually returns new_type.  Returns old_type if new_type is only a slight
+  // improvement, such that it would take many (>>10) steps to reach 2**32.
+
+public:
+  PhaseIterGVN( PhaseIterGVN *igvn ); // Used by CCP constructor
+  PhaseIterGVN( PhaseGVN *gvn ); // Used after Parser
+  PhaseIterGVN( PhaseIterGVN *igvn, const char *dummy ); // Used after +VerifyOpto
+
+  virtual PhaseIterGVN *is_IterGVN() { return this; }
+
+  Unique_Node_List _worklist;       // Iterative worklist
+
+  // Given def-use info and an initial worklist, apply Node::Ideal,
+  // Node::Value, Node::Identity, hash-based value numbering, Node::Ideal_DU
+  // and dominator info to a fixed point.
+  void optimize();
+
+  // Register a new node with the iter GVN pass without transforming it.
+  // Used when we need to restructure a Region/Phi area and all the Regions
+  // and Phis need to complete this one big transform before any other
+  // transforms can be triggered on the region.
+  // Optional 'orig' is an earlier version of this node.
+  // It is significant only for debugging and profiling.
+  Node* register_new_node_with_optimizer(Node* n, Node* orig = NULL);
+
+  // Kill a globally dead Node.   It is allowed to have uses which are
+  // assumed dead and left 'in limbo'.
+  void remove_globally_dead_node( Node *dead );
+
+  // Kill all inputs to a dead node, recursively making more dead nodes.
+  // The Node must be dead locally, i.e., have no uses.
+  void remove_dead_node( Node *dead ) {
+    assert(dead->outcnt() == 0 && !dead->is_top(), "node must be dead");
+    remove_globally_dead_node(dead);
+  }
+
+  // Subsume users of node 'old' into node 'nn'
+  // If no Def-Use info existed for 'nn' it will after call.
+  void subsume_node( Node *old, Node *nn );
+
+  // Add users of 'n' to worklist
+  void add_users_to_worklist0( Node *n );
+  void add_users_to_worklist ( Node *n );
+
+#ifndef PRODUCT
+protected:
+  // Sub-quadratic implementation of VerifyIterativeGVN.
+  unsigned long _verify_counter;
+  unsigned long _verify_full_passes;
+  enum { _verify_window_size = 30 };
+  Node* _verify_window[_verify_window_size];
+  void verify_step(Node* n);
+#endif
+};
+
+//------------------------------PhaseCCP---------------------------------------
+// Phase for performing global Conditional Constant Propagation.
+// Should be replaced with combined CCP & GVN someday.
+class PhaseCCP : public PhaseIterGVN {
+  // Non-recursive.  Use analysis to transform single Node.
+  virtual Node *transform_once( Node *n );
+
+public:
+  PhaseCCP( PhaseIterGVN *igvn ); // Compute conditional constants
+  NOT_PRODUCT( ~PhaseCCP(); )
+
+  // Worklist algorithm identifies constants
+  void analyze();
+  // Recursive traversal of program.  Used analysis to modify program.
+  virtual Node *transform( Node *n );
+  // Do any transformation after analysis
+  void          do_transform();
+
+  virtual const Type* saturate(const Type* new_type, const Type* old_type,
+                               const Type* limit_type) const;
+  // Returns new_type->widen(old_type), which increments the widen bits until
+  // giving up with TypeInt::INT or TypeLong::LONG.
+  // Result is clipped to limit_type if necessary.
+
+#ifndef PRODUCT
+  static uint _total_invokes;    // For profiling, count invocations
+  void    inc_invokes()          { ++PhaseCCP::_total_invokes; }
+
+  static uint _total_constants;  // For profiling, count constants found
+  uint   _count_constants;
+  void    clear_constants()      { _count_constants = 0; }
+  void    inc_constants()        { ++_count_constants; }
+  uint    count_constants() const { return _count_constants; }
+
+  static void print_statistics();
+#endif
+};
+
+
+//------------------------------PhasePeephole----------------------------------
+// Phase for performing peephole optimizations on register allocated basic blocks.
+class PhasePeephole : public PhaseTransform {
+  PhaseRegAlloc *_regalloc;
+  PhaseCFG     &_cfg;
+  // Recursive traversal of program.  Pure function is unused in this phase
+  virtual Node *transform( Node *n );
+
+public:
+  PhasePeephole( PhaseRegAlloc *regalloc, PhaseCFG &cfg );
+  NOT_PRODUCT( ~PhasePeephole(); )
+
+  // Do any transformation after analysis
+  void          do_transform();
+
+#ifndef PRODUCT
+  static uint _total_peepholes;  // For profiling, count peephole rules applied
+  uint   _count_peepholes;
+  void    clear_peepholes()      { _count_peepholes = 0; }
+  void    inc_peepholes()        { ++_count_peepholes; }
+  uint    count_peepholes() const { return _count_peepholes; }
+
+  static void print_statistics();
+#endif
+};
diff --git a/src/share/vm/opto/postaloc.cpp b/src/share/vm/opto/postaloc.cpp
new file mode 100644
index 000000000..35b469bae
--- /dev/null
+++ b/src/share/vm/opto/postaloc.cpp
@@ -0,0 +1,584 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_postaloc.cpp.incl"
+
+// see if this register kind does not requires two registers
+static bool is_single_register(uint x) {
+#ifdef _LP64
+  return (x != Op_RegD && x != Op_RegL && x != Op_RegP);
+#else
+  return (x != Op_RegD && x != Op_RegL);
+#endif
+}
+
+//------------------------------may_be_copy_of_callee-----------------------------
+// Check to see if we can possibly be a copy of a callee-save value.
+bool PhaseChaitin::may_be_copy_of_callee( Node *def ) const {
+  // Short circuit if there are no callee save registers
+  if (_matcher.number_of_saved_registers() == 0) return false;
+
+  // Expect only a spill-down and reload on exit for callee-save spills.
+  // Chains of copies cannot be deep.
+  // 5008997 - This is wishful thinking. Register allocator seems to
+  // be splitting live ranges for callee save registers to such
+  // an extent that in large methods the chains can be very long
+  // (50+). The conservative answer is to return true if we don't
+  // know as this prevents optimizations from occuring.
+
+  const int limit = 60;
+  int i;
+  for( i=0; i < limit; i++ ) {
+    if( def->is_Proj() && def->in(0)->is_Start() &&
+        _matcher.is_save_on_entry(lrgs(n2lidx(def)).reg()) )
+      return true;              // Direct use of callee-save proj
+    if( def->is_Copy() )        // Copies carry value through
+      def = def->in(def->is_Copy());
+    else if( def->is_Phi() )    // Phis can merge it from any direction
+      def = def->in(1);
+    else
+      break;
+    guarantee(def != NULL, "must not resurrect dead copy");
+  }
+  // If we reached the end and didn't find a callee save proj
+  // then this may be a callee save proj so we return true
+  // as the conservative answer. If we didn't reach then end
+  // we must have discovered that it was not a callee save
+  // else we would have returned.
+  return i == limit;
+}
+
+
+
+//------------------------------yank_if_dead-----------------------------------
+// Removed an edge from 'old'.  Yank if dead.  Return adjustment counts to
+// iterators in the current block.
+int PhaseChaitin::yank_if_dead( Node *old, Block *current_block, Node_List *value, Node_List *regnd ) {
+  int blk_adjust=0;
+  while (old->outcnt() == 0 && old != C->top()) {
+    Block *oldb = _cfg._bbs[old->_idx];
+    oldb->find_remove(old);
+    // Count 1 if deleting an instruction from the current block
+    if( oldb == current_block ) blk_adjust++;
+    _cfg._bbs.map(old->_idx,NULL);
+    OptoReg::Name old_reg = lrgs(n2lidx(old)).reg();
+    if( regnd && (*regnd)[old_reg]==old ) { // Instruction is currently available?
+      value->map(old_reg,NULL);  // Yank from value/regnd maps
+      regnd->map(old_reg,NULL);  // This register's value is now unknown
+    }
+    Node *tmp = old->req() > 1 ? old->in(1) : NULL;
+    old->disconnect_inputs(NULL);
+    if( !tmp ) break;
+    old = tmp;
+  }
+  return blk_adjust;
+}
+
+//------------------------------use_prior_register-----------------------------
+// Use the prior value instead of the current value, in an effort to make
+// the current value go dead.  Return block iterator adjustment, in case
+// we yank some instructions from this block.
+int PhaseChaitin::use_prior_register( Node *n, uint idx, Node *def, Block *current_block, Node_List &value, Node_List &regnd ) {
+  // No effect?
+  if( def == n->in(idx) ) return 0;
+  // Def is currently dead and can be removed?  Do not resurrect
+  if( def->outcnt() == 0 ) return 0;
+
+  // Not every pair of physical registers are assignment compatible,
+  // e.g. on sparc floating point registers are not assignable to integer
+  // registers.
+  const LRG &def_lrg = lrgs(n2lidx(def));
+  OptoReg::Name def_reg = def_lrg.reg();
+  const RegMask &use_mask = n->in_RegMask(idx);
+  bool can_use = ( RegMask::can_represent(def_reg) ? (use_mask.Member(def_reg) != 0)
+                                                   : (use_mask.is_AllStack() != 0));
+  // Check for a copy to or from a misaligned pair.
+  can_use = can_use && !use_mask.is_misaligned_Pair() && !def_lrg.mask().is_misaligned_Pair();
+
+  if (!can_use)
+    return 0;
+
+  // Capture the old def in case it goes dead...
+  Node *old = n->in(idx);
+
+  // Save-on-call copies can only be elided if the entire copy chain can go
+  // away, lest we get the same callee-save value alive in 2 locations at
+  // once.  We check for the obvious trivial case here.  Although it can
+  // sometimes be elided with cooperation outside our scope, here we will just
+  // miss the opportunity.  :-(
+  if( may_be_copy_of_callee(def) ) {
+    if( old->outcnt() > 1 ) return 0; // We're the not last user
+    int idx = old->is_Copy();
+    assert( idx, "chain of copies being removed" );
+    Node *old2 = old->in(idx);  // Chain of copies
+    if( old2->outcnt() > 1 ) return 0; // old is not the last user
+    int idx2 = old2->is_Copy();
+    if( !idx2 ) return 0;       // Not a chain of 2 copies
+    if( def != old2->in(idx2) ) return 0; // Chain of exactly 2 copies
+  }
+
+  // Use the new def
+  n->set_req(idx,def);
+  _post_alloc++;
+
+  // Is old def now dead?  We successfully yanked a copy?
+  return yank_if_dead(old,current_block,&value,&regnd);
+}
+
+
+//------------------------------skip_copies------------------------------------
+// Skip through any number of copies (that don't mod oop-i-ness)
+Node *PhaseChaitin::skip_copies( Node *c ) {
+  int idx = c->is_Copy();
+  uint is_oop = lrgs(n2lidx(c))._is_oop;
+  while (idx != 0) {
+    guarantee(c->in(idx) != NULL, "must not resurrect dead copy");
+    if (lrgs(n2lidx(c->in(idx)))._is_oop != is_oop)
+      break;  // casting copy, not the same value
+    c = c->in(idx);
+    idx = c->is_Copy();
+  }
+  return c;
+}
+
+//------------------------------elide_copy-------------------------------------
+// Remove (bypass) copies along Node n, edge k.
+int PhaseChaitin::elide_copy( Node *n, int k, Block *current_block, Node_List &value, Node_List &regnd, bool can_change_regs ) {
+  int blk_adjust = 0;
+
+  uint nk_idx = n2lidx(n->in(k));
+  OptoReg::Name nk_reg = lrgs(nk_idx ).reg();
+
+  // Remove obvious same-register copies
+  Node *x = n->in(k);
+  int idx;
+  while( (idx=x->is_Copy()) != 0 ) {
+    Node *copy = x->in(idx);
+    guarantee(copy != NULL, "must not resurrect dead copy");
+    if( lrgs(n2lidx(copy)).reg() != nk_reg ) break;
+    blk_adjust += use_prior_register(n,k,copy,current_block,value,regnd);
+    if( n->in(k) != copy ) break; // Failed for some cutout?
+    x = copy;                   // Progress, try again
+  }
+
+  // Phis and 2-address instructions cannot change registers so easily - their
+  // outputs must match their input.
+  if( !can_change_regs )
+    return blk_adjust;          // Only check stupid copies!
+
+  // Loop backedges won't have a value-mapping yet
+  if( &value == NULL ) return blk_adjust;
+
+  // Skip through all copies to the _value_ being used.  Do not change from
+  // int to pointer.  This attempts to jump through a chain of copies, where
+  // intermediate copies might be illegal, i.e., value is stored down to stack
+  // then reloaded BUT survives in a register the whole way.
+  Node *val = skip_copies(n->in(k));
+
+  if( val == x ) return blk_adjust; // No progress?
+
+  bool single = is_single_register(val->ideal_reg());
+  uint val_idx = n2lidx(val);
+  OptoReg::Name val_reg = lrgs(val_idx).reg();
+
+  // See if it happens to already be in the correct register!
+  // (either Phi's direct register, or the common case of the name
+  // never-clobbered original-def register)
+  if( value[val_reg] == val &&
+      // Doubles check both halves
+      ( single || value[val_reg-1] == val ) ) {
+    blk_adjust += use_prior_register(n,k,regnd[val_reg],current_block,value,regnd);
+    if( n->in(k) == regnd[val_reg] ) // Success!  Quit trying
+      return blk_adjust;
+  }
+
+  // See if we can skip the copy by changing registers.  Don't change from
+  // using a register to using the stack unless we know we can remove a
+  // copy-load.  Otherwise we might end up making a pile of Intel cisc-spill
+  // ops reading from memory instead of just loading once and using the
+  // register.
+
+  // Also handle duplicate copies here.
+  const Type *t = val->is_Con() ? val->bottom_type() : NULL;
+
+  // Scan all registers to see if this value is around already
+  for( uint reg = 0; reg < (uint)_max_reg; reg++ ) {
+    Node *vv = value[reg];
+    if( !single ) {             // Doubles check for aligned-adjacent pair
+      if( (reg&1)==0 ) continue;  // Wrong half of a pair
+      if( vv != value[reg-1] ) continue; // Not a complete pair
+    }
+    if( vv == val ||            // Got a direct hit?
+        (t && vv && vv->bottom_type() == t && vv->is_Mach() &&
+         vv->as_Mach()->rule() == val->as_Mach()->rule()) ) { // Or same constant?
+      assert( !n->is_Phi(), "cannot change registers at a Phi so easily" );
+      if( OptoReg::is_stack(nk_reg) || // CISC-loading from stack OR
+          OptoReg::is_reg(reg) || // turning into a register use OR
+          regnd[reg]->outcnt()==1 ) { // last use of a spill-load turns into a CISC use
+        blk_adjust += use_prior_register(n,k,regnd[reg],current_block,value,regnd);
+        if( n->in(k) == regnd[reg] ) // Success!  Quit trying
+          return blk_adjust;
+      } // End of if not degrading to a stack
+    } // End of if found value in another register
+  } // End of scan all machine registers
+  return blk_adjust;
+}
+
+
+//
+// Check if nreg already contains the constant value val.  Normal copy
+// elimination doesn't doesn't work on constants because multiple
+// nodes can represent the same constant so the type and rule of the
+// MachNode must be checked to ensure equivalence.
+//
+bool PhaseChaitin::eliminate_copy_of_constant(Node* val, Block *current_block,
+                                              Node_List& value, Node_List& regnd,
+                                              OptoReg::Name nreg, OptoReg::Name nreg2) {
+  if (value[nreg] != val && val->is_Con() &&
+      value[nreg] != NULL && value[nreg]->is_Con() &&
+      (nreg2 == OptoReg::Bad || value[nreg] == value[nreg2]) &&
+      value[nreg]->bottom_type() == val->bottom_type() &&
+      value[nreg]->as_Mach()->rule() == val->as_Mach()->rule()) {
+    // This code assumes that two MachNodes representing constants
+    // which have the same rule and the same bottom type will produce
+    // identical effects into a register.  This seems like it must be
+    // objectively true unless there are hidden inputs to the nodes
+    // but if that were to change this code would need to updated.
+    // Since they are equivalent the second one if redundant and can
+    // be removed.
+    //
+    // val will be replaced with the old value but val might have
+    // kills projections associated with it so remove them now so that
+    // yank_if_dead will be able to elminate the copy once the uses
+    // have been transferred to the old[value].
+    for (DUIterator_Fast imax, i = val->fast_outs(imax); i < imax; i++) {
+      Node* use = val->fast_out(i);
+      if (use->is_Proj() && use->outcnt() == 0) {
+        // Kill projections have no users and one input
+        use->set_req(0, C->top());
+        yank_if_dead(use, current_block, &value, &regnd);
+        --i; --imax;
+      }
+    }
+    _post_alloc++;
+    return true;
+  }
+  return false;
+}
+
+
+//------------------------------post_allocate_copy_removal---------------------
+// Post-Allocation peephole copy removal.  We do this in 1 pass over the
+// basic blocks.  We maintain a mapping of registers to Nodes (an  array of
+// Nodes indexed by machine register or stack slot number).  NULL means that a
+// register is not mapped to any Node.  We can (want to have!) have several
+// registers map to the same Node.  We walk forward over the instructions
+// updating the mapping as we go.  At merge points we force a NULL if we have
+// to merge 2 different Nodes into the same register.  Phi functions will give
+// us a new Node if there is a proper value merging.  Since the blocks are
+// arranged in some RPO, we will visit all parent blocks before visiting any
+// successor blocks (except at loops).
+//
+// If we find a Copy we look to see if the Copy's source register is a stack
+// slot and that value has already been loaded into some machine register; if
+// so we use machine register directly.  This turns a Load into a reg-reg
+// Move.  We also look for reloads of identical constants.
+//
+// When we see a use from a reg-reg Copy, we will attempt to use the copy's
+// source directly and make the copy go dead.
+void PhaseChaitin::post_allocate_copy_removal() {
+  NOT_PRODUCT( Compile::TracePhase t3("postAllocCopyRemoval", &_t_postAllocCopyRemoval, TimeCompiler); )
+  ResourceMark rm;
+
+  // Need a mapping from basic block Node_Lists.  We need a Node_List to
+  // map from register number to value-producing Node.
+  Node_List **blk2value = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
+  memset( blk2value, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+  // Need a mapping from basic block Node_Lists.  We need a Node_List to
+  // map from register number to register-defining Node.
+  Node_List **blk2regnd = NEW_RESOURCE_ARRAY( Node_List *, _cfg._num_blocks+1);
+  memset( blk2regnd, 0, sizeof(Node_List*)*(_cfg._num_blocks+1) );
+
+  // We keep unused Node_Lists on a free_list to avoid wasting
+  // memory.
+  GrowableArray<Node_List*> free_list = GrowableArray<Node_List*>(16);
+
+  // For all blocks
+  for( uint i = 0; i < _cfg._num_blocks; i++ ) {
+    uint j;
+    Block *b = _cfg._blocks[i];
+
+    // Count of Phis in block
+    uint phi_dex;
+    for( phi_dex = 1; phi_dex < b->_nodes.size(); phi_dex++ ) {
+      Node *phi = b->_nodes[phi_dex];
+      if( !phi->is_Phi() )
+        break;
+    }
+
+    // If any predecessor has not been visited, we do not know the state
+    // of registers at the start.  Check for this, while updating copies
+    // along Phi input edges
+    bool missing_some_inputs = false;
+    Block *freed = NULL;
+    for( j = 1; j < b->num_preds(); j++ ) {
+      Block *pb = _cfg._bbs[b->pred(j)->_idx];
+      // Remove copies along phi edges
+      for( uint k=1; k<phi_dex; k++ )
+        elide_copy( b->_nodes[k], j, b, *blk2value[pb->_pre_order], *blk2regnd[pb->_pre_order], false );
+      if( blk2value[pb->_pre_order] ) { // Have a mapping on this edge?
+        // See if this predecessor's mappings have been used by everybody
+        // who wants them.  If so, free 'em.
+        uint k;
+        for( k=0; k<pb->_num_succs; k++ ) {
+          Block *pbsucc = pb->_succs[k];
+          if( !blk2value[pbsucc->_pre_order] && pbsucc != b )
+            break;              // Found a future user
+        }
+        if( k >= pb->_num_succs ) { // No more uses, free!
+          freed = pb;           // Record last block freed
+          free_list.push(blk2value[pb->_pre_order]);
+          free_list.push(blk2regnd[pb->_pre_order]);
+        }
+      } else {                  // This block has unvisited (loopback) inputs
+        missing_some_inputs = true;
+      }
+    }
+
+
+    // Extract Node_List mappings.  If 'freed' is non-zero, we just popped
+    // 'freed's blocks off the list
+    Node_List &regnd = *(free_list.is_empty() ? new Node_List() : free_list.pop());
+    Node_List &value = *(free_list.is_empty() ? new Node_List() : free_list.pop());
+    assert( !freed || blk2value[freed->_pre_order] == &value, "" );
+    value.map(_max_reg,NULL);
+    regnd.map(_max_reg,NULL);
+    // Set mappings as OUR mappings
+    blk2value[b->_pre_order] = &value;
+    blk2regnd[b->_pre_order] = &regnd;
+
+    // Initialize value & regnd for this block
+    if( missing_some_inputs ) {
+      // Some predecessor has not yet been visited; zap map to empty
+      for( uint k = 0; k < (uint)_max_reg; k++ ) {
+        value.map(k,NULL);
+        regnd.map(k,NULL);
+      }
+    } else {
+      if( !freed ) {            // Didn't get a freebie prior block
+        // Must clone some data
+        freed = _cfg._bbs[b->pred(1)->_idx];
+        Node_List &f_value = *blk2value[freed->_pre_order];
+        Node_List &f_regnd = *blk2regnd[freed->_pre_order];
+        for( uint k = 0; k < (uint)_max_reg; k++ ) {
+          value.map(k,f_value[k]);
+          regnd.map(k,f_regnd[k]);
+        }
+      }
+      // Merge all inputs together, setting to NULL any conflicts.
+      for( j = 1; j < b->num_preds(); j++ ) {
+        Block *pb = _cfg._bbs[b->pred(j)->_idx];
+        if( pb == freed ) continue; // Did self already via freelist
+        Node_List &p_regnd = *blk2regnd[pb->_pre_order];
+        for( uint k = 0; k < (uint)_max_reg; k++ ) {
+          if( regnd[k] != p_regnd[k] ) { // Conflict on reaching defs?
+            value.map(k,NULL); // Then no value handy
+            regnd.map(k,NULL);
+          }
+        }
+      }
+    }
+
+    // For all Phi's
+    for( j = 1; j < phi_dex; j++ ) {
+      uint k;
+      Node *phi = b->_nodes[j];
+      uint pidx = n2lidx(phi);
+      OptoReg::Name preg = lrgs(n2lidx(phi)).reg();
+
+      // Remove copies remaining on edges.  Check for junk phi.
+      Node *u = NULL;
+      for( k=1; k<phi->req(); k++ ) {
+        Node *x = phi->in(k);
+        if( phi != x && u != x ) // Found a different input
+          u = u ? NodeSentinel : x; // Capture unique input, or NodeSentinel for 2nd input
+      }
+      if( u != NodeSentinel ) {    // Junk Phi.  Remove
+        b->_nodes.remove(j--); phi_dex--;
+        _cfg._bbs.map(phi->_idx,NULL);
+        phi->replace_by(u);
+        phi->disconnect_inputs(NULL);
+        continue;
+      }
+      // Note that if value[pidx] exists, then we merged no new values here
+      // and the phi is useless.  This can happen even with the above phi
+      // removal for complex flows.  I cannot keep the better known value here
+      // because locally the phi appears to define a new merged value.  If I
+      // keep the better value then a copy of the phi, being unable to use the
+      // global flow analysis, can't "peek through" the phi to the original
+      // reaching value and so will act like it's defining a new value.  This
+      // can lead to situations where some uses are from the old and some from
+      // the new values.  Not illegal by itself but throws the over-strong
+      // assert in scheduling.
+      if( pidx ) {
+        value.map(preg,phi);
+        regnd.map(preg,phi);
+        OptoReg::Name preg_lo = OptoReg::add(preg,-1);
+        if( !is_single_register(phi->ideal_reg()) ) {
+          value.map(preg_lo,phi);
+          regnd.map(preg_lo,phi);
+        }
+      }
+    }
+
+    // For all remaining instructions
+    for( j = phi_dex; j < b->_nodes.size(); j++ ) {
+      Node *n = b->_nodes[j];
+
+      if( n->outcnt() == 0 &&   // Dead?
+          n != C->top() &&      // (ignore TOP, it has no du info)
+          !n->is_Proj() ) {     // fat-proj kills
+        j -= yank_if_dead(n,b,&value,&regnd);
+        continue;
+      }
+
+      // Improve reaching-def info.  Occasionally post-alloc's liveness gives
+      // up (at loop backedges, because we aren't doing a full flow pass).
+      // The presence of a live use essentially asserts that the use's def is
+      // alive and well at the use (or else the allocator fubar'd).  Take
+      // advantage of this info to set a reaching def for the use-reg.
+      uint k;
+      for( k = 1; k < n->req(); k++ ) {
+        Node *def = n->in(k);   // n->in(k) is a USE; def is the DEF for this USE
+        guarantee(def != NULL, "no disconnected nodes at this point");
+        uint useidx = n2lidx(def); // useidx is the live range index for this USE
+
+        if( useidx ) {
+          OptoReg::Name ureg = lrgs(useidx).reg();
+          if( !value[ureg] ) {
+            int idx;            // Skip occasional useless copy
+            while( (idx=def->is_Copy()) != 0 &&
+                   def->in(idx) != NULL &&  // NULL should not happen
+                   ureg == lrgs(n2lidx(def->in(idx))).reg() )
+              def = def->in(idx);
+            Node *valdef = skip_copies(def); // tighten up val through non-useless copies
+            value.map(ureg,valdef); // record improved reaching-def info
+            regnd.map(ureg,   def);
+            // Record other half of doubles
+            OptoReg::Name ureg_lo = OptoReg::add(ureg,-1);
+            if( !is_single_register(def->ideal_reg()) &&
+                ( !RegMask::can_represent(ureg_lo) ||
+                  lrgs(useidx).mask().Member(ureg_lo) ) && // Nearly always adjacent
+                !value[ureg_lo] ) {
+              value.map(ureg_lo,valdef); // record improved reaching-def info
+              regnd.map(ureg_lo,   def);
+            }
+          }
+        }
+      }
+
+      const uint two_adr = n->is_Mach() ? n->as_Mach()->two_adr() : 0;
+
+      // Remove copies along input edges
+      for( k = 1; k < n->req(); k++ )
+        j -= elide_copy( n, k, b, value, regnd, two_adr!=k );
+
+      // Unallocated Nodes define no registers
+      uint lidx = n2lidx(n);
+      if( !lidx ) continue;
+
+      // Update the register defined by this instruction
+      OptoReg::Name nreg = lrgs(lidx).reg();
+      // Skip through all copies to the _value_ being defined.
+      // Do not change from int to pointer
+      Node *val = skip_copies(n);
+
+      uint n_ideal_reg = n->ideal_reg();
+      if( is_single_register(n_ideal_reg) ) {
+        // If Node 'n' does not change the value mapped by the register,
+        // then 'n' is a useless copy.  Do not update the register->node
+        // mapping so 'n' will go dead.
+        if( value[nreg] != val ) {
+          if (eliminate_copy_of_constant(val, b, value, regnd, nreg, OptoReg::Bad)) {
+            n->replace_by(regnd[nreg]);
+            j -= yank_if_dead(n,b,&value,&regnd);
+          } else {
+            // Update the mapping: record new Node defined by the register
+            regnd.map(nreg,n);
+            // Update mapping for defined *value*, which is the defined
+            // Node after skipping all copies.
+            value.map(nreg,val);
+          }
+        } else if( !may_be_copy_of_callee(n) && regnd[nreg]->outcnt() != 0 ) {
+          assert( n->is_Copy(), "" );
+          n->replace_by(regnd[nreg]);
+          j -= yank_if_dead(n,b,&value,&regnd);
+        }
+      } else {
+        // If the value occupies a register pair, record same info
+        // in both registers.
+        OptoReg::Name nreg_lo = OptoReg::add(nreg,-1);
+        if( RegMask::can_represent(nreg_lo) &&     // Either a spill slot, or
+            !lrgs(lidx).mask().Member(nreg_lo) ) { // Nearly always adjacent
+          // Sparc occasionally has non-adjacent pairs.
+          // Find the actual other value
+          RegMask tmp = lrgs(lidx).mask();
+          tmp.Remove(nreg);
+          nreg_lo = tmp.find_first_elem();
+        }
+        if( value[nreg] != val || value[nreg_lo] != val ) {
+          if (eliminate_copy_of_constant(n, b, value, regnd, nreg, nreg_lo)) {
+            n->replace_by(regnd[nreg]);
+            j -= yank_if_dead(n,b,&value,&regnd);
+          } else {
+            regnd.map(nreg   , n );
+            regnd.map(nreg_lo, n );
+            value.map(nreg   ,val);
+            value.map(nreg_lo,val);
+          }
+        } else if( !may_be_copy_of_callee(n) && regnd[nreg]->outcnt() != 0 ) {
+          assert( n->is_Copy(), "" );
+          n->replace_by(regnd[nreg]);
+          j -= yank_if_dead(n,b,&value,&regnd);
+        }
+      }
+
+      // Fat projections kill many registers
+      if( n_ideal_reg == MachProjNode::fat_proj ) {
+        RegMask rm = n->out_RegMask();
+        // wow, what an expensive iterator...
+        nreg = rm.find_first_elem();
+        while( OptoReg::is_valid(nreg)) {
+          rm.Remove(nreg);
+          value.map(nreg,n);
+          regnd.map(nreg,n);
+          nreg = rm.find_first_elem();
+        }
+      }
+
+    } // End of for all instructions in the block
+
+  } // End for all blocks
+}
diff --git a/src/share/vm/opto/reg_split.cpp b/src/share/vm/opto/reg_split.cpp
new file mode 100644
index 000000000..5101eb2e7
--- /dev/null
+++ b/src/share/vm/opto/reg_split.cpp
@@ -0,0 +1,1300 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_reg_split.cpp.incl"
+
+//------------------------------Split--------------------------------------
+// Walk the graph in RPO and for each lrg which spills, propogate reaching
+// definitions.  During propogation, split the live range around regions of
+// High Register Pressure (HRP).  If a Def is in a region of Low Register
+// Pressure (LRP), it will not get spilled until we encounter a region of
+// HRP between it and one of its uses.  We will spill at the transition
+// point between LRP and HRP.  Uses in the HRP region will use the spilled
+// Def.  The first Use outside the HRP region will generate a SpillCopy to
+// hoist the live range back up into a register, and all subsequent uses
+// will use that new Def until another HRP region is encountered.  Defs in
+// HRP regions will get trailing SpillCopies to push the LRG down into the
+// stack immediately.
+//
+// As a side effect, unlink from (hence make dead) coalesced copies.
+//
+
+static const char out_of_nodes[] = "out of nodes during split";
+
+//------------------------------get_spillcopy_wide-----------------------------
+// Get a SpillCopy node with wide-enough masks.  Use the 'wide-mask', the
+// wide ideal-register spill-mask if possible.  If the 'wide-mask' does
+// not cover the input (or output), use the input (or output) mask instead.
+Node *PhaseChaitin::get_spillcopy_wide( Node *def, Node *use, uint uidx ) {
+  // If ideal reg doesn't exist we've got a bad schedule happening
+  // that is forcing us to spill something that isn't spillable.
+  // Bail rather than abort
+  int ireg = def->ideal_reg();
+  if( ireg == 0 || ireg == Op_RegFlags ) {
+    C->record_method_not_compilable("attempted to spill a non-spillable item");
+    return NULL;
+  }
+  if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+    return NULL;
+  }
+  const RegMask *i_mask = &def->out_RegMask();
+  const RegMask *w_mask = C->matcher()->idealreg2spillmask[ireg];
+  const RegMask *o_mask = use ? &use->in_RegMask(uidx) : w_mask;
+  const RegMask *w_i_mask = w_mask->overlap( *i_mask ) ? w_mask : i_mask;
+  const RegMask *w_o_mask;
+
+  if( w_mask->overlap( *o_mask ) && // Overlap AND
+      ((ireg != Op_RegL && ireg != Op_RegD // Single use or aligned
+#ifdef _LP64
+        && ireg != Op_RegP
+#endif
+         ) || o_mask->is_aligned_Pairs()) ) {
+    // Don't come here for mis-aligned doubles
+    w_o_mask = w_mask;
+  } else {                      // wide ideal mask does not overlap with o_mask
+    // Mis-aligned doubles come here and XMM->FPR moves on x86.
+    w_o_mask = o_mask;          // Must target desired registers
+    // Does the ideal-reg-mask overlap with o_mask?  I.e., can I use
+    // a reg-reg move or do I need a trip across register classes
+    // (and thus through memory)?
+    if( !C->matcher()->idealreg2regmask[ireg]->overlap( *o_mask) && o_mask->is_UP() )
+      // Here we assume a trip through memory is required.
+      w_i_mask = &C->FIRST_STACK_mask();
+  }
+  return new (C) MachSpillCopyNode( def, *w_i_mask, *w_o_mask );
+}
+
+//------------------------------insert_proj------------------------------------
+// Insert the spill at chosen location.  Skip over any interveneing Proj's or
+// Phis.  Skip over a CatchNode and projs, inserting in the fall-through block
+// instead.  Update high-pressure indices.  Create a new live range.
+void PhaseChaitin::insert_proj( Block *b, uint i, Node *spill, uint maxlrg ) {
+  // Skip intervening ProjNodes.  Do not insert between a ProjNode and
+  // its definer.
+  while( i < b->_nodes.size() &&
+         (b->_nodes[i]->is_Proj() ||
+          b->_nodes[i]->is_Phi() ) )
+    i++;
+
+  // Do not insert between a call and his Catch
+  if( b->_nodes[i]->is_Catch() ) {
+    // Put the instruction at the top of the fall-thru block.
+    // Find the fall-thru projection
+    while( 1 ) {
+      const CatchProjNode *cp = b->_nodes[++i]->as_CatchProj();
+      if( cp->_con == CatchProjNode::fall_through_index )
+        break;
+    }
+    int sidx = i - b->end_idx()-1;
+    b = b->_succs[sidx];        // Switch to successor block
+    i = 1;                      // Right at start of block
+  }
+
+  b->_nodes.insert(i,spill);    // Insert node in block
+  _cfg._bbs.map(spill->_idx,b); // Update node->block mapping to reflect
+  // Adjust the point where we go hi-pressure
+  if( i <= b->_ihrp_index ) b->_ihrp_index++;
+  if( i <= b->_fhrp_index ) b->_fhrp_index++;
+
+  // Assign a new Live Range Number to the SpillCopy and grow
+  // the node->live range mapping.
+  new_lrg(spill,maxlrg);
+}
+
+//------------------------------split_DEF--------------------------------------
+// There are four catagories of Split; UP/DOWN x DEF/USE
+// Only three of these really occur as DOWN/USE will always color
+// Any Split with a DEF cannot CISC-Spill now.  Thus we need
+// two helper routines, one for Split DEFS (insert after instruction),
+// one for Split USES (insert before instruction).  DEF insertion
+// happens inside Split, where the Leaveblock array is updated.
+uint PhaseChaitin::split_DEF( Node *def, Block *b, int loc, uint maxlrg, Node **Reachblock, Node **debug_defs, GrowableArray<uint> splits, int slidx ) {
+#ifdef ASSERT
+  // Increment the counter for this lrg
+  splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+  // If we are spilling the memory op for an implicit null check, at the
+  // null check location (ie - null check is in HRP block) we need to do
+  // the null-check first, then spill-down in the following block.
+  // (The implicit_null_check function ensures the use is also dominated
+  // by the branch-not-taken block.)
+  Node *be = b->end();
+  if( be->is_MachNullCheck() && be->in(1) == def && def == b->_nodes[loc] ) {
+    // Spill goes in the branch-not-taken block
+    b = b->_succs[b->_nodes[b->end_idx()+1]->Opcode() == Op_IfTrue];
+    loc = 0;                    // Just past the Region
+  }
+  assert( loc >= 0, "must insert past block head" );
+
+  // Get a def-side SpillCopy
+  Node *spill = get_spillcopy_wide(def,NULL,0);
+  // Did we fail to split?, then bail
+  if (!spill) {
+    return 0;
+  }
+
+  // Insert the spill at chosen location
+  insert_proj( b, loc+1, spill, maxlrg++);
+
+  // Insert new node into Reaches array
+  Reachblock[slidx] = spill;
+  // Update debug list of reaching down definitions by adding this one
+  debug_defs[slidx] = spill;
+
+  // return updated count of live ranges
+  return maxlrg;
+}
+
+//------------------------------split_USE--------------------------------------
+// Splits at uses can involve redeffing the LRG, so no CISC Spilling there.
+// Debug uses want to know if def is already stack enabled.
+uint PhaseChaitin::split_USE( Node *def, Block *b, Node *use, uint useidx, uint maxlrg, bool def_down, bool cisc_sp, GrowableArray<uint> splits, int slidx ) {
+#ifdef ASSERT
+  // Increment the counter for this lrg
+  splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+
+  // Some setup stuff for handling debug node uses
+  JVMState* jvms = use->jvms();
+  uint debug_start = jvms ? jvms->debug_start() : 999999;
+  uint debug_end   = jvms ? jvms->debug_end()   : 999999;
+
+  //-------------------------------------------
+  // Check for use of debug info
+  if (useidx >= debug_start && useidx < debug_end) {
+    // Actually it's perfectly legal for constant debug info to appear
+    // just unlikely.  In this case the optimizer left a ConI of a 4
+    // as both inputs to a Phi with only a debug use.  It's a single-def
+    // live range of a rematerializable value.  The live range spills,
+    // rematerializes and now the ConI directly feeds into the debug info.
+    // assert(!def->is_Con(), "constant debug info already constructed directly");
+
+    // Special split handling for Debug Info
+    // If DEF is DOWN, just hook the edge and return
+    // If DEF is UP, Split it DOWN for this USE.
+    if( def->is_Mach() ) {
+      if( def_down ) {
+        // DEF is DOWN, so connect USE directly to the DEF
+        use->set_req(useidx, def);
+      } else {
+        // Block and index where the use occurs.
+        Block *b = _cfg._bbs[use->_idx];
+        // Put the clone just prior to use
+        int bindex = b->find_node(use);
+        // DEF is UP, so must copy it DOWN and hook in USE
+        // Insert SpillCopy before the USE, which uses DEF as its input,
+        // and defs a new live range, which is used by this node.
+        Node *spill = get_spillcopy_wide(def,use,useidx);
+        // did we fail to split?
+        if (!spill) {
+          // Bail
+          return 0;
+        }
+        // insert into basic block
+        insert_proj( b, bindex, spill, maxlrg++ );
+        // Use the new split
+        use->set_req(useidx,spill);
+      }
+      // No further split handling needed for this use
+      return maxlrg;
+    }  // End special splitting for debug info live range
+  }  // If debug info
+
+  // CISC-SPILLING
+  // Finally, check to see if USE is CISC-Spillable, and if so,
+  // gather_lrg_masks will add the flags bit to its mask, and
+  // no use side copy is needed.  This frees up the live range
+  // register choices without causing copy coalescing, etc.
+  if( UseCISCSpill && cisc_sp ) {
+    int inp = use->cisc_operand();
+    if( inp != AdlcVMDeps::Not_cisc_spillable )
+      // Convert operand number to edge index number
+      inp = use->as_Mach()->operand_index(inp);
+    if( inp == (int)useidx ) {
+      use->set_req(useidx, def);
+#ifndef PRODUCT
+      if( TraceCISCSpill ) {
+        tty->print("  set_split: ");
+        use->dump();
+      }
+#endif
+      return maxlrg;
+    }
+  }
+
+  //-------------------------------------------
+  // Insert a Copy before the use
+
+  // Block and index where the use occurs.
+  int bindex;
+  // Phi input spill-copys belong at the end of the prior block
+  if( use->is_Phi() ) {
+    b = _cfg._bbs[b->pred(useidx)->_idx];
+    bindex = b->end_idx();
+  } else {
+    // Put the clone just prior to use
+    bindex = b->find_node(use);
+  }
+
+  Node *spill = get_spillcopy_wide( def, use, useidx );
+  if( !spill ) return 0;        // Bailed out
+  // Insert SpillCopy before the USE, which uses the reaching DEF as
+  // its input, and defs a new live range, which is used by this node.
+  insert_proj( b, bindex, spill, maxlrg++ );
+  // Use the spill/clone
+  use->set_req(useidx,spill);
+
+  // return updated live range count
+  return maxlrg;
+}
+
+//------------------------------split_Rematerialize----------------------------
+// Clone a local copy of the def.
+Node *PhaseChaitin::split_Rematerialize( Node *def, Block *b, uint insidx, uint &maxlrg, GrowableArray<uint> splits, int slidx, uint *lrg2reach, Node **Reachblock, bool walkThru ) {
+  // The input live ranges will be stretched to the site of the new
+  // instruction.  They might be stretched past a def and will thus
+  // have the old and new values of the same live range alive at the
+  // same time - a definite no-no.  Split out private copies of
+  // the inputs.
+  if( def->req() > 1 ) {
+    for( uint i = 1; i < def->req(); i++ ) {
+      Node *in = def->in(i);
+      // Check for single-def (LRG cannot redefined)
+      uint lidx = n2lidx(in);
+      if( lidx >= _maxlrg ) continue; // Value is a recent spill-copy
+      if( lrgs(lidx)._def != NodeSentinel ) continue;
+
+      Block *b_def = _cfg._bbs[def->_idx];
+      int idx_def = b_def->find_node(def);
+      Node *in_spill = get_spillcopy_wide( in, def, i );
+      if( !in_spill ) return 0; // Bailed out
+      insert_proj(b_def,idx_def,in_spill,maxlrg++);
+      if( b_def == b )
+        insidx++;
+      def->set_req(i,in_spill);
+    }
+  }
+
+  Node *spill = def->clone();
+  if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+    // Check when generating nodes
+    return 0;
+  }
+
+  // See if any inputs are currently being spilled, and take the
+  // latest copy of spilled inputs.
+  if( spill->req() > 1 ) {
+    for( uint i = 1; i < spill->req(); i++ ) {
+      Node *in = spill->in(i);
+      uint lidx = Find_id(in);
+
+      // Walk backwards thru spill copy node intermediates
+      if( walkThru )
+        while ( in->is_SpillCopy() && lidx >= _maxlrg ) {
+          in = in->in(1);
+          lidx = Find_id(in);
+        }
+
+      if( lidx < _maxlrg && lrgs(lidx).reg() >= LRG::SPILL_REG ) {
+        Node *rdef = Reachblock[lrg2reach[lidx]];
+        if( rdef ) spill->set_req(i,rdef);
+      }
+    }
+  }
+
+
+  assert( spill->out_RegMask().is_UP(), "rematerialize to a reg" );
+  // Rematerialized op is def->spilled+1
+  set_was_spilled(spill);
+  if( _spilled_once.test(def->_idx) )
+    set_was_spilled(spill);
+
+  insert_proj( b, insidx, spill, maxlrg++ );
+#ifdef ASSERT
+  // Increment the counter for this lrg
+  splits.at_put(slidx, splits.at(slidx)+1);
+#endif
+  // See if the cloned def kills any flags, and copy those kills as well
+  uint i = insidx+1;
+  if( clone_projs( b, i, def, spill, maxlrg ) ) {
+    // Adjust the point where we go hi-pressure
+    if( i <= b->_ihrp_index ) b->_ihrp_index++;
+    if( i <= b->_fhrp_index ) b->_fhrp_index++;
+  }
+
+  return spill;
+}
+
+//------------------------------is_high_pressure-------------------------------
+// Function to compute whether or not this live range is "high pressure"
+// in this block - whether it spills eagerly or not.
+bool PhaseChaitin::is_high_pressure( Block *b, LRG *lrg, uint insidx ) {
+  if( lrg->_was_spilled1 ) return true;
+  // Forced spilling due to conflict?  Then split only at binding uses
+  // or defs, not for supposed capacity problems.
+  // CNC - Turned off 7/8/99, causes too much spilling
+  // if( lrg->_is_bound ) return false;
+
+  // Not yet reached the high-pressure cutoff point, so low pressure
+  uint hrp_idx = lrg->_is_float ? b->_fhrp_index : b->_ihrp_index;
+  if( insidx < hrp_idx ) return false;
+  // Register pressure for the block as a whole depends on reg class
+  int block_pres = lrg->_is_float ? b->_freg_pressure : b->_reg_pressure;
+  // Bound live ranges will split at the binding points first;
+  // Intermediate splits should assume the live range's register set
+  // got "freed up" and that num_regs will become INT_PRESSURE.
+  int bound_pres = lrg->_is_float ? FLOATPRESSURE : INTPRESSURE;
+  // Effective register pressure limit.
+  int lrg_pres = (lrg->get_invalid_mask_size() > lrg->num_regs())
+    ? (lrg->get_invalid_mask_size() >> (lrg->num_regs()-1)) : bound_pres;
+  // High pressure if block pressure requires more register freedom
+  // than live range has.
+  return block_pres >= lrg_pres;
+}
+
+
+//------------------------------prompt_use---------------------------------
+// True if lidx is used before any real register is def'd in the block
+bool PhaseChaitin::prompt_use( Block *b, uint lidx ) {
+  if( lrgs(lidx)._was_spilled2 ) return false;
+
+  // Scan block for 1st use.
+  for( uint i = 1; i <= b->end_idx(); i++ ) {
+    Node *n = b->_nodes[i];
+    // Ignore PHI use, these can be up or down
+    if( n->is_Phi() ) continue;
+    for( uint j = 1; j < n->req(); j++ )
+      if( Find_id(n->in(j)) == lidx )
+        return true;          // Found 1st use!
+    if( n->out_RegMask().is_NotEmpty() ) return false;
+  }
+  return false;
+}
+
+//------------------------------Split--------------------------------------
+//----------Split Routine----------
+// ***** NEW SPLITTING HEURISTIC *****
+// DEFS: If the DEF is in a High Register Pressure(HRP) Block, split there.
+//        Else, no split unless there is a HRP block between a DEF and
+//        one of its uses, and then split at the HRP block.
+//
+// USES: If USE is in HRP, split at use to leave main LRG on stack.
+//       Else, hoist LRG back up to register only (ie - split is also DEF)
+// We will compute a new maxlrg as we go
+uint PhaseChaitin::Split( uint maxlrg ) {
+  NOT_PRODUCT( Compile::TracePhase t3("regAllocSplit", &_t_regAllocSplit, TimeCompiler); )
+
+  uint                 bidx, pidx, slidx, insidx, inpidx, twoidx;
+  uint                 non_phi = 1, spill_cnt = 0;
+  Node               **Reachblock;
+  Node                *n1, *n2, *n3;
+  Node_List           *defs,*phis;
+  bool                *UPblock;
+  bool                 u1, u2, u3;
+  Block               *b, *pred;
+  PhiNode             *phi;
+  GrowableArray<uint>  lidxs;
+
+  // Array of counters to count splits per live range
+  GrowableArray<uint>  splits;
+
+  //----------Setup Code----------
+  // Create a convenient mapping from lrg numbers to reaches/leaves indices
+  uint *lrg2reach = NEW_RESOURCE_ARRAY( uint, _maxlrg );
+  // Keep track of DEFS & Phis for later passes
+  defs = new Node_List();
+  phis = new Node_List();
+  // Gather info on which LRG's are spilling, and build maps
+  for( bidx = 1; bidx < _maxlrg; bidx++ ) {
+    if( lrgs(bidx).alive() && lrgs(bidx).reg() >= LRG::SPILL_REG ) {
+      assert(!lrgs(bidx).mask().is_AllStack(),"AllStack should color");
+      lrg2reach[bidx] = spill_cnt;
+      spill_cnt++;
+      lidxs.append(bidx);
+#ifdef ASSERT
+      // Initialize the split counts to zero
+      splits.append(0);
+#endif
+#ifndef PRODUCT
+      if( PrintOpto && WizardMode && lrgs(bidx)._was_spilled1 )
+        tty->print_cr("Warning, 2nd spill of L%d",bidx);
+#endif
+    }
+  }
+
+  // Create side arrays for propagating reaching defs info.
+  // Each block needs a node pointer for each spilling live range for the
+  // Def which is live into the block.  Phi nodes handle multiple input
+  // Defs by querying the output of their predecessor blocks and resolving
+  // them to a single Def at the phi.  The pointer is updated for each
+  // Def in the block, and then becomes the output for the block when
+  // processing of the block is complete.  We also need to track whether
+  // a Def is UP or DOWN.  UP means that it should get a register (ie -
+  // it is always in LRP regions), and DOWN means that it is probably
+  // on the stack (ie - it crosses HRP regions).
+  Node ***Reaches     = NEW_RESOURCE_ARRAY( Node**, _cfg._num_blocks+1 );
+  bool  **UP          = NEW_RESOURCE_ARRAY( bool*, _cfg._num_blocks+1 );
+  Node  **debug_defs  = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
+  VectorSet **UP_entry= NEW_RESOURCE_ARRAY( VectorSet*, spill_cnt );
+
+  // Initialize Reaches & UP
+  for( bidx = 0; bidx < _cfg._num_blocks+1; bidx++ ) {
+    Reaches[bidx]     = NEW_RESOURCE_ARRAY( Node*, spill_cnt );
+    UP[bidx]          = NEW_RESOURCE_ARRAY( bool, spill_cnt );
+    Node **Reachblock = Reaches[bidx];
+    bool *UPblock     = UP[bidx];
+    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+      UPblock[slidx] = true;     // Assume they start in registers
+      Reachblock[slidx] = NULL;  // Assume that no def is present
+    }
+  }
+
+  // Initialize to array of empty vectorsets
+  for( slidx = 0; slidx < spill_cnt; slidx++ )
+    UP_entry[slidx] = new VectorSet(Thread::current()->resource_area());
+
+  //----------PASS 1----------
+  //----------Propagation & Node Insertion Code----------
+  // Walk the Blocks in RPO for DEF & USE info
+  for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
+
+    if (C->check_node_count(spill_cnt, out_of_nodes)) {
+      return 0;
+    }
+
+    b  = _cfg._blocks[bidx];
+    // Reaches & UP arrays for this block
+    Reachblock = Reaches[b->_pre_order];
+    UPblock    = UP[b->_pre_order];
+    // Reset counter of start of non-Phi nodes in block
+    non_phi = 1;
+    //----------Block Entry Handling----------
+    // Check for need to insert a new phi
+    // Cycle through this block's predecessors, collecting Reaches
+    // info for each spilled LRG.  If they are identical, no phi is
+    // needed.  If they differ, check for a phi, and insert if missing,
+    // or update edges if present.  Set current block's Reaches set to
+    // be either the phi's or the reaching def, as appropriate.
+    // If no Phi is needed, check if the LRG needs to spill on entry
+    // to the block due to HRP.
+    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+      // Grab the live range number
+      uint lidx = lidxs.at(slidx);
+      // Do not bother splitting or putting in Phis for single-def
+      // rematerialized live ranges.  This happens alot to constants
+      // with long live ranges.
+      if( lrgs(lidx)._def != NodeSentinel &&
+          lrgs(lidx)._def->rematerialize() ) {
+        // reset the Reaches & UP entries
+        Reachblock[slidx] = lrgs(lidx)._def;
+        UPblock[slidx] = true;
+        // Record following instruction in case 'n' rematerializes and
+        // kills flags
+        Block *pred1 = _cfg._bbs[b->pred(1)->_idx];
+        continue;
+      }
+
+      // Initialize needs_phi and needs_split
+      bool needs_phi = false;
+      bool needs_split = false;
+      // Walk the predecessor blocks to check inputs for that live range
+      // Grab predecessor block header
+      n1 = b->pred(1);
+      // Grab the appropriate reaching def info for inpidx
+      pred = _cfg._bbs[n1->_idx];
+      pidx = pred->_pre_order;
+      Node **Ltmp = Reaches[pidx];
+      bool  *Utmp = UP[pidx];
+      n1 = Ltmp[slidx];
+      u1 = Utmp[slidx];
+      // Initialize node for saving type info
+      n3 = n1;
+      u3 = u1;
+
+      // Compare inputs to see if a Phi is needed
+      for( inpidx = 2; inpidx < b->num_preds(); inpidx++ ) {
+        // Grab predecessor block headers
+        n2 = b->pred(inpidx);
+        // Grab the appropriate reaching def info for inpidx
+        pred = _cfg._bbs[n2->_idx];
+        pidx = pred->_pre_order;
+        Ltmp = Reaches[pidx];
+        Utmp = UP[pidx];
+        n2 = Ltmp[slidx];
+        u2 = Utmp[slidx];
+        // For each LRG, decide if a phi is necessary
+        if( n1 != n2 ) {
+          needs_phi = true;
+        }
+        // See if the phi has mismatched inputs, UP vs. DOWN
+        if( n1 && n2 && (u1 != u2) ) {
+          needs_split = true;
+        }
+        // Move n2/u2 to n1/u1 for next iteration
+        n1 = n2;
+        u1 = u2;
+        // Preserve a non-NULL predecessor for later type referencing
+        if( (n3 == NULL) && (n2 != NULL) ){
+          n3 = n2;
+          u3 = u2;
+        }
+      }  // End for all potential Phi inputs
+
+      // If a phi is needed, check for it
+      if( needs_phi ) {
+        // check block for appropriate phinode & update edges
+        for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
+          n1 = b->_nodes[insidx];
+          // bail if this is not a phi
+          phi = n1->is_Phi() ? n1->as_Phi() : NULL;
+          if( phi == NULL ) {
+            // Keep track of index of first non-PhiNode instruction in block
+            non_phi = insidx;
+            // break out of the for loop as we have handled all phi nodes
+            break;
+          }
+          // must be looking at a phi
+          if( Find_id(n1) == lidxs.at(slidx) ) {
+            // found the necessary phi
+            needs_phi = false;
+            // initialize the Reaches entry for this LRG
+            Reachblock[slidx] = phi;
+            break;
+          }  // end if found correct phi
+        }  // end for all phi's
+        // add new phinode if one not already found
+        if( needs_phi ) {
+          // create a new phi node and insert it into the block
+          // type is taken from left over pointer to a predecessor
+          assert(n3,"No non-NULL reaching DEF for a Phi");
+          phi = new (C, b->num_preds()) PhiNode(b->head(), n3->bottom_type());
+          // initialize the Reaches entry for this LRG
+          Reachblock[slidx] = phi;
+
+          // add node to block & node_to_block mapping
+          insert_proj( b, insidx++, phi, maxlrg++ );
+          non_phi++;
+          // Reset new phi's mapping to be the spilling live range
+          _names.map(phi->_idx, lidx);
+          assert(Find_id(phi) == lidx,"Bad update on Union-Find mapping");
+        }  // end if not found correct phi
+        // Here you have either found or created the Phi, so record it
+        assert(phi != NULL,"Must have a Phi Node here");
+        phis->push(phi);
+        // PhiNodes should either force the LRG UP or DOWN depending
+        // on its inputs and the register pressure in the Phi's block.
+        UPblock[slidx] = true;  // Assume new DEF is UP
+        // If entering a high-pressure area with no immediate use,
+        // assume Phi is DOWN
+        if( is_high_pressure( b, &lrgs(lidx), b->end_idx()) && !prompt_use(b,lidx) )
+          UPblock[slidx] = false;
+        // If we are not split up/down and all inputs are down, then we
+        // are down
+        if( !needs_split && !u3 )
+          UPblock[slidx] = false;
+      }  // end if phi is needed
+
+      // Do not need a phi, so grab the reaching DEF
+      else {
+        // Grab predecessor block header
+        n1 = b->pred(1);
+        // Grab the appropriate reaching def info for k
+        pred = _cfg._bbs[n1->_idx];
+        pidx = pred->_pre_order;
+        Node **Ltmp = Reaches[pidx];
+        bool  *Utmp = UP[pidx];
+        // reset the Reaches & UP entries
+        Reachblock[slidx] = Ltmp[slidx];
+        UPblock[slidx] = Utmp[slidx];
+      }  // end else no Phi is needed
+    }  // end for all spilling live ranges
+    // DEBUG
+#ifndef PRODUCT
+    if(trace_spilling()) {
+      tty->print("/`\nBlock %d: ", b->_pre_order);
+      tty->print("Reaching Definitions after Phi handling\n");
+      for( uint x = 0; x < spill_cnt; x++ ) {
+        tty->print("Spill Idx %d: UP %d: Node\n",x,UPblock[x]);
+        if( Reachblock[x] )
+          Reachblock[x]->dump();
+        else
+          tty->print("Undefined\n");
+      }
+    }
+#endif
+
+    //----------Non-Phi Node Splitting----------
+    // Since phi-nodes have now been handled, the Reachblock array for this
+    // block is initialized with the correct starting value for the defs which
+    // reach non-phi instructions in this block.  Thus, process non-phi
+    // instructions normally, inserting SpillCopy nodes for all spill
+    // locations.
+
+    // Memoize any DOWN reaching definitions for use as DEBUG info
+    for( insidx = 0; insidx < spill_cnt; insidx++ ) {
+      debug_defs[insidx] = (UPblock[insidx]) ? NULL : Reachblock[insidx];
+      if( UPblock[insidx] )     // Memoize UP decision at block start
+        UP_entry[insidx]->set( b->_pre_order );
+    }
+
+    //----------Walk Instructions in the Block and Split----------
+    // For all non-phi instructions in the block
+    for( insidx = 1; insidx <= b->end_idx(); insidx++ ) {
+      Node *n = b->_nodes[insidx];
+      // Find the defining Node's live range index
+      uint defidx = Find_id(n);
+      uint cnt = n->req();
+
+      if( n->is_Phi() ) {
+        // Skip phi nodes after removing dead copies.
+        if( defidx < _maxlrg ) {
+          // Check for useless Phis.  These appear if we spill, then
+          // coalesce away copies.  Dont touch Phis in spilling live
+          // ranges; they are busy getting modifed in this pass.
+          if( lrgs(defidx).reg() < LRG::SPILL_REG ) {
+            uint i;
+            Node *u = NULL;
+            // Look for the Phi merging 2 unique inputs
+            for( i = 1; i < cnt; i++ ) {
+              // Ignore repeats and self
+              if( n->in(i) != u && n->in(i) != n ) {
+                // Found a unique input
+                if( u != NULL ) // If it's the 2nd, bail out
+                  break;
+                u = n->in(i);   // Else record it
+              }
+            }
+            assert( u, "at least 1 valid input expected" );
+            if( i >= cnt ) {    // Didn't find 2+ unique inputs?
+              n->replace_by(u); // Then replace with unique input
+              n->disconnect_inputs(NULL);
+              b->_nodes.remove(insidx);
+              insidx--;
+              b->_ihrp_index--;
+              b->_fhrp_index--;
+            }
+          }
+        }
+        continue;
+      }
+      assert( insidx > b->_ihrp_index ||
+              (b->_reg_pressure < (uint)INTPRESSURE) ||
+              b->_ihrp_index > 4000000 ||
+              b->_ihrp_index >= b->end_idx() ||
+              !b->_nodes[b->_ihrp_index]->is_Proj(), "" );
+      assert( insidx > b->_fhrp_index ||
+              (b->_freg_pressure < (uint)FLOATPRESSURE) ||
+              b->_fhrp_index > 4000000 ||
+              b->_fhrp_index >= b->end_idx() ||
+              !b->_nodes[b->_fhrp_index]->is_Proj(), "" );
+
+      // ********** Handle Crossing HRP Boundry **********
+      if( (insidx == b->_ihrp_index) || (insidx == b->_fhrp_index) ) {
+        for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+          // Check for need to split at HRP boundry - split if UP
+          n1 = Reachblock[slidx];
+          // bail out if no reaching DEF
+          if( n1 == NULL ) continue;
+          // bail out if live range is 'isolated' around inner loop
+          uint lidx = lidxs.at(slidx);
+          // If live range is currently UP
+          if( UPblock[slidx] ) {
+            // set location to insert spills at
+            // SPLIT DOWN HERE - NO CISC SPILL
+            if( is_high_pressure( b, &lrgs(lidx), insidx ) &&
+                !n1->rematerialize() ) {
+              // If there is already a valid stack definition available, use it
+              if( debug_defs[slidx] != NULL ) {
+                Reachblock[slidx] = debug_defs[slidx];
+              }
+              else {
+                // Insert point is just past last use or def in the block
+                int insert_point = insidx-1;
+                while( insert_point > 0 ) {
+                  Node *n = b->_nodes[insert_point];
+                  // Hit top of block?  Quit going backwards
+                  if( n->is_Phi() ) break;
+                  // Found a def?  Better split after it.
+                  if( n2lidx(n) == lidx ) break;
+                  // Look for a use
+                  uint i;
+                  for( i = 1; i < n->req(); i++ )
+                    if( n2lidx(n->in(i)) == lidx )
+                      break;
+                  // Found a use?  Better split after it.
+                  if( i < n->req() ) break;
+                  insert_point--;
+                }
+                maxlrg = split_DEF( n1, b, insert_point, maxlrg, Reachblock, debug_defs, splits, slidx);
+                // If it wasn't split bail
+                if (!maxlrg) {
+                  return 0;
+                }
+                insidx++;
+              }
+              // This is a new DEF, so update UP
+              UPblock[slidx] = false;
+#ifndef PRODUCT
+              // DEBUG
+              if( trace_spilling() ) {
+                tty->print("\nNew Split DOWN DEF of Spill Idx ");
+                tty->print("%d, UP %d:\n",slidx,false);
+                n1->dump();
+              }
+#endif
+            }
+          }  // end if LRG is UP
+        }  // end for all spilling live ranges
+        assert( b->_nodes[insidx] == n, "got insidx set incorrectly" );
+      }  // end if crossing HRP Boundry
+
+      // If the LRG index is oob, then this is a new spillcopy, skip it.
+      if( defidx >= _maxlrg ) {
+        continue;
+      }
+      LRG &deflrg = lrgs(defidx);
+      uint copyidx = n->is_Copy();
+      // Remove coalesced copy from CFG
+      if( copyidx && defidx == n2lidx(n->in(copyidx)) ) {
+        n->replace_by( n->in(copyidx) );
+        n->set_req( copyidx, NULL );
+        b->_nodes.remove(insidx--);
+        b->_ihrp_index--; // Adjust the point where we go hi-pressure
+        b->_fhrp_index--;
+        continue;
+      }
+
+#define DERIVED 0
+
+      // ********** Handle USES **********
+      bool nullcheck = false;
+      // Implicit null checks never use the spilled value
+      if( n->is_MachNullCheck() )
+        nullcheck = true;
+      if( !nullcheck ) {
+        // Search all inputs for a Spill-USE
+        JVMState* jvms = n->jvms();
+        uint oopoff = jvms ? jvms->oopoff() : cnt;
+        uint old_last = cnt - 1;
+        for( inpidx = 1; inpidx < cnt; inpidx++ ) {
+          // Derived/base pairs may be added to our inputs during this loop.
+          // If inpidx > old_last, then one of these new inputs is being
+          // handled. Skip the derived part of the pair, but process
+          // the base like any other input.
+          if( inpidx > old_last && ((inpidx - oopoff) & 1) == DERIVED ) {
+            continue;  // skip derived_debug added below
+          }
+          // Get lidx of input
+          uint useidx = Find_id(n->in(inpidx));
+          // Not a brand-new split, and it is a spill use
+          if( useidx < _maxlrg && lrgs(useidx).reg() >= LRG::SPILL_REG ) {
+            // Check for valid reaching DEF
+            slidx = lrg2reach[useidx];
+            Node *def = Reachblock[slidx];
+            assert( def != NULL, "Using Undefined Value in Split()\n");
+
+            // (+++) %%%% remove this in favor of pre-pass in matcher.cpp
+            // monitor references do not care where they live, so just hook
+            if ( jvms && jvms->is_monitor_use(inpidx) ) {
+              // The effect of this clone is to drop the node out of the block,
+              // so that the allocator does not see it anymore, and therefore
+              // does not attempt to assign it a register.
+              def = def->clone();
+              _names.extend(def->_idx,0);
+              _cfg._bbs.map(def->_idx,b);
+              n->set_req(inpidx, def);
+              if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {
+                return 0;
+              }
+              continue;
+            }
+
+            // Rematerializable?  Then clone def at use site instead
+            // of store/load
+            if( def->rematerialize() ) {
+              int old_size = b->_nodes.size();
+              def = split_Rematerialize( def, b, insidx, maxlrg, splits, slidx, lrg2reach, Reachblock, true );
+              if( !def ) return 0; // Bail out
+              insidx += b->_nodes.size()-old_size;
+            }
+
+            MachNode *mach = n->is_Mach() ? n->as_Mach() : NULL;
+            // Base pointers and oopmap references do not care where they live.
+            if ((inpidx >= oopoff) ||
+                (mach && mach->ideal_Opcode() == Op_AddP && inpidx == AddPNode::Base)) {
+              if (def->rematerialize() && lrgs(useidx)._was_spilled2) {
+                // This def has been rematerialized a couple of times without
+                // progress. It doesn't care if it lives UP or DOWN, so
+                // spill it down now.
+                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false,splits,slidx);
+                // If it wasn't split bail
+                if (!maxlrg) {
+                  return 0;
+                }
+                insidx++;  // Reset iterator to skip USE side split
+              } else {
+                // Just hook the def edge
+                n->set_req(inpidx, def);
+              }
+
+              if (inpidx >= oopoff) {
+                // After oopoff, we have derived/base pairs.  We must mention all
+                // derived pointers here as derived/base pairs for GC.  If the
+                // derived value is spilling and we have a copy both in Reachblock
+                // (called here 'def') and debug_defs[slidx] we need to mention
+                // both in derived/base pairs or kill one.
+                Node *derived_debug = debug_defs[slidx];
+                if( ((inpidx - oopoff) & 1) == DERIVED && // derived vs base?
+                    mach && mach->ideal_Opcode() != Op_Halt &&
+                    derived_debug != NULL &&
+                    derived_debug != def ) { // Actual 2nd value appears
+                  // We have already set 'def' as a derived value.
+                  // Also set debug_defs[slidx] as a derived value.
+                  uint k;
+                  for( k = oopoff; k < cnt; k += 2 )
+                    if( n->in(k) == derived_debug )
+                      break;      // Found an instance of debug derived
+                  if( k == cnt ) {// No instance of debug_defs[slidx]
+                    // Add a derived/base pair to cover the debug info.
+                    // We have to process the added base later since it is not
+                    // handled yet at this point but skip derived part.
+                    assert(((n->req() - oopoff) & 1) == DERIVED,
+                           "must match skip condition above");
+                    n->add_req( derived_debug );   // this will be skipped above
+                    n->add_req( n->in(inpidx+1) ); // this will be processed
+                    // Increment cnt to handle added input edges on
+                    // subsequent iterations.
+                    cnt += 2;
+                  }
+                }
+              }
+              continue;
+            }
+            // Special logic for DEBUG info
+            if( jvms && b->_freq > BLOCK_FREQUENCY(0.5) ) {
+              uint debug_start = jvms->debug_start();
+              // If this is debug info use & there is a reaching DOWN def
+              if ((debug_start <= inpidx) && (debug_defs[slidx] != NULL)) {
+                assert(inpidx < oopoff, "handle only debug info here");
+                // Just hook it in & move on
+                n->set_req(inpidx, debug_defs[slidx]);
+                // (Note that this can make two sides of a split live at the
+                // same time: The debug def on stack, and another def in a
+                // register.  The GC needs to know about both of them, but any
+                // derived pointers after oopoff will refer to only one of the
+                // two defs and the GC would therefore miss the other.  Thus
+                // this hack is only allowed for debug info which is Java state
+                // and therefore never a derived pointer.)
+                continue;
+              }
+            }
+            // Grab register mask info
+            const RegMask &dmask = def->out_RegMask();
+            const RegMask &umask = n->in_RegMask(inpidx);
+
+            assert(inpidx < oopoff, "cannot use-split oop map info");
+
+            bool dup = UPblock[slidx];
+            bool uup = umask.is_UP();
+
+            // Need special logic to handle bound USES. Insert a split at this
+            // bound use if we can't rematerialize the def, or if we need the
+            // split to form a misaligned pair.
+            if( !umask.is_AllStack() &&
+                (int)umask.Size() <= lrgs(useidx).num_regs() &&
+                (!def->rematerialize() ||
+                 umask.is_misaligned_Pair())) {
+              // These need a Split regardless of overlap or pressure
+              // SPLIT - NO DEF - NO CISC SPILL
+              maxlrg = split_USE(def,b,n,inpidx,maxlrg,dup,false, splits,slidx);
+              // If it wasn't split bail
+              if (!maxlrg) {
+                return 0;
+              }
+              insidx++;  // Reset iterator to skip USE side split
+              continue;
+            }
+            // Here is the logic chart which describes USE Splitting:
+            // 0 = false or DOWN, 1 = true or UP
+            //
+            // Overlap | DEF | USE | Action
+            //-------------------------------------------------------
+            //    0    |  0  |  0  | Copy - mem -> mem
+            //    0    |  0  |  1  | Split-UP - Check HRP
+            //    0    |  1  |  0  | Split-DOWN - Debug Info?
+            //    0    |  1  |  1  | Copy - reg -> reg
+            //    1    |  0  |  0  | Reset Input Edge (no Split)
+            //    1    |  0  |  1  | Split-UP - Check HRP
+            //    1    |  1  |  0  | Split-DOWN - Debug Info?
+            //    1    |  1  |  1  | Reset Input Edge (no Split)
+            //
+            // So, if (dup == uup), then overlap test determines action,
+            // with true being no split, and false being copy. Else,
+            // if DEF is DOWN, Split-UP, and check HRP to decide on
+            // resetting DEF. Finally if DEF is UP, Split-DOWN, with
+            // special handling for Debug Info.
+            if( dup == uup ) {
+              if( dmask.overlap(umask) ) {
+                // Both are either up or down, and there is overlap, No Split
+                n->set_req(inpidx, def);
+              }
+              else {  // Both are either up or down, and there is no overlap
+                if( dup ) {  // If UP, reg->reg copy
+                  // COPY ACROSS HERE - NO DEF - NO CISC SPILL
+                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                  // If it wasn't split bail
+                  if (!maxlrg) {
+                    return 0;
+                  }
+                  insidx++;  // Reset iterator to skip USE side split
+                }
+                else {       // DOWN, mem->mem copy
+                  // COPY UP & DOWN HERE - NO DEF - NO CISC SPILL
+                  // First Split-UP to move value into Register
+                  uint def_ideal = def->ideal_reg();
+                  const RegMask* tmp_rm = Matcher::idealreg2regmask[def_ideal];
+                  Node *spill = new (C) MachSpillCopyNode(def, dmask, *tmp_rm);
+                  insert_proj( b, insidx, spill, maxlrg );
+                  // Then Split-DOWN as if previous Split was DEF
+                  maxlrg = split_USE(spill,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                  // If it wasn't split bail
+                  if (!maxlrg) {
+                    return 0;
+                  }
+                  insidx += 2;  // Reset iterator to skip USE side splits
+                }
+              }  // End else no overlap
+            }  // End if dup == uup
+            // dup != uup, so check dup for direction of Split
+            else {
+              if( dup ) {  // If UP, Split-DOWN and check Debug Info
+                // If this node is already a SpillCopy, just patch the edge
+                // except the case of spilling to stack.
+                if( n->is_SpillCopy() ) {
+                  RegMask tmp_rm(umask);
+                  tmp_rm.SUBTRACT(Matcher::STACK_ONLY_mask);
+                  if( dmask.overlap(tmp_rm) ) {
+                    if( def != n->in(inpidx) ) {
+                      n->set_req(inpidx, def);
+                    }
+                    continue;
+                  }
+                }
+                // COPY DOWN HERE - NO DEF - NO CISC SPILL
+                maxlrg = split_USE(def,b,n,inpidx,maxlrg,false,false, splits,slidx);
+                // If it wasn't split bail
+                if (!maxlrg) {
+                  return 0;
+                }
+                insidx++;  // Reset iterator to skip USE side split
+                // Check for debug-info split.  Capture it for later
+                // debug splits of the same value
+                if (jvms && jvms->debug_start() <= inpidx && inpidx < oopoff)
+                  debug_defs[slidx] = n->in(inpidx);
+
+              }
+              else {       // DOWN, Split-UP and check register pressure
+                if( is_high_pressure( b, &lrgs(useidx), insidx ) ) {
+                  // COPY UP HERE - NO DEF - CISC SPILL
+                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,true, splits,slidx);
+                  // If it wasn't split bail
+                  if (!maxlrg) {
+                    return 0;
+                  }
+                  insidx++;  // Reset iterator to skip USE side split
+                } else {                          // LRP
+                  // COPY UP HERE - WITH DEF - NO CISC SPILL
+                  maxlrg = split_USE(def,b,n,inpidx,maxlrg,true,false, splits,slidx);
+                  // If it wasn't split bail
+                  if (!maxlrg) {
+                    return 0;
+                  }
+                  // Flag this lift-up in a low-pressure block as
+                  // already-spilled, so if it spills again it will
+                  // spill hard (instead of not spilling hard and
+                  // coalescing away).
+                  set_was_spilled(n->in(inpidx));
+                  // Since this is a new DEF, update Reachblock & UP
+                  Reachblock[slidx] = n->in(inpidx);
+                  UPblock[slidx] = true;
+                  insidx++;  // Reset iterator to skip USE side split
+                }
+              }  // End else DOWN
+            }  // End dup != uup
+          }  // End if Spill USE
+        }  // End For All Inputs
+      }  // End If not nullcheck
+
+      // ********** Handle DEFS **********
+      // DEFS either Split DOWN in HRP regions or when the LRG is bound, or
+      // just reset the Reaches info in LRP regions.  DEFS must always update
+      // UP info.
+      if( deflrg.reg() >= LRG::SPILL_REG ) {    // Spilled?
+        uint slidx = lrg2reach[defidx];
+        // Add to defs list for later assignment of new live range number
+        defs->push(n);
+        // Set a flag on the Node indicating it has already spilled.
+        // Only do it for capacity spills not conflict spills.
+        if( !deflrg._direct_conflict )
+          set_was_spilled(n);
+        assert(!n->is_Phi(),"Cannot insert Phi into DEFS list");
+        // Grab UP info for DEF
+        const RegMask &dmask = n->out_RegMask();
+        bool defup = dmask.is_UP();
+        // Only split at Def if this is a HRP block or bound (and spilled once)
+        if( !n->rematerialize() &&
+            (((dmask.is_bound1() || dmask.is_bound2() || dmask.is_misaligned_Pair()) &&
+             (deflrg._direct_conflict || deflrg._must_spill)) ||
+             // Check for LRG being up in a register and we are inside a high
+             // pressure area.  Spill it down immediately.
+             (defup && is_high_pressure(b,&deflrg,insidx))) ) {
+          assert( !n->rematerialize(), "" );
+          assert( !n->is_SpillCopy(), "" );
+          // Do a split at the def site.
+          maxlrg = split_DEF( n, b, insidx, maxlrg, Reachblock, debug_defs, splits, slidx );
+          // If it wasn't split bail
+          if (!maxlrg) {
+            return 0;
+          }
+          // Split DEF's Down
+          UPblock[slidx] = 0;
+#ifndef PRODUCT
+          // DEBUG
+          if( trace_spilling() ) {
+            tty->print("\nNew Split DOWN DEF of Spill Idx ");
+            tty->print("%d, UP %d:\n",slidx,false);
+            n->dump();
+          }
+#endif
+        }
+        else {                  // Neither bound nor HRP, must be LRP
+          // otherwise, just record the def
+          Reachblock[slidx] = n;
+          // UP should come from the outRegmask() of the DEF
+          UPblock[slidx] = defup;
+          // Update debug list of reaching down definitions, kill if DEF is UP
+          debug_defs[slidx] = defup ? NULL : n;
+#ifndef PRODUCT
+          // DEBUG
+          if( trace_spilling() ) {
+            tty->print("\nNew DEF of Spill Idx ");
+            tty->print("%d, UP %d:\n",slidx,defup);
+            n->dump();
+          }
+#endif
+        }  // End else LRP
+      }  // End if spill def
+
+      // ********** Split Left Over Mem-Mem Moves **********
+      // Check for mem-mem copies and split them now.  Do not do this
+      // to copies about to be spilled; they will be Split shortly.
+      if( copyidx ) {
+        Node *use = n->in(copyidx);
+        uint useidx = Find_id(use);
+        if( useidx < _maxlrg &&       // This is not a new split
+            OptoReg::is_stack(deflrg.reg()) &&
+            deflrg.reg() < LRG::SPILL_REG ) { // And DEF is from stack
+          LRG &uselrg = lrgs(useidx);
+          if( OptoReg::is_stack(uselrg.reg()) &&
+              uselrg.reg() < LRG::SPILL_REG && // USE is from stack
+              deflrg.reg() != uselrg.reg() ) { // Not trivially removed
+            uint def_ideal_reg = Matcher::base2reg[n->bottom_type()->base()];
+            const RegMask &def_rm = *Matcher::idealreg2regmask[def_ideal_reg];
+            const RegMask &use_rm = n->in_RegMask(copyidx);
+            if( def_rm.overlap(use_rm) && n->is_SpillCopy() ) {  // Bug 4707800, 'n' may be a storeSSL
+              if (C->check_node_count(NodeLimitFudgeFactor, out_of_nodes)) {  // Check when generating nodes
+                return 0;
+              }
+              Node *spill = new (C) MachSpillCopyNode(use,use_rm,def_rm);
+              n->set_req(copyidx,spill);
+              n->as_MachSpillCopy()->set_in_RegMask(def_rm);
+              // Put the spill just before the copy
+              insert_proj( b, insidx++, spill, maxlrg++ );
+            }
+          }
+        }
+      }
+    }  // End For All Instructions in Block - Non-PHI Pass
+
+    // Check if each LRG is live out of this block so as not to propagate
+    // beyond the last use of a LRG.
+    for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+      uint defidx = lidxs.at(slidx);
+      IndexSet *liveout = _live->live(b);
+      if( !liveout->member(defidx) ) {
+#ifdef ASSERT
+        // The index defidx is not live.  Check the liveout array to ensure that
+        // it contains no members which compress to defidx.  Finding such an
+        // instance may be a case to add liveout adjustment in compress_uf_map().
+        // See 5063219.
+        uint member;
+        IndexSetIterator isi(liveout);
+        while ((member = isi.next()) != 0) {
+          assert(defidx != Find_const(member), "Live out member has not been compressed");
+        }
+#endif
+        Reachblock[slidx] = NULL;
+      } else {
+        assert(Reachblock[slidx] != NULL,"No reaching definition for liveout value");
+      }
+    }
+#ifndef PRODUCT
+    if( trace_spilling() )
+      b->dump();
+#endif
+  }  // End For All Blocks
+
+  //----------PASS 2----------
+  // Reset all DEF live range numbers here
+  for( insidx = 0; insidx < defs->size(); insidx++ ) {
+    // Grab the def
+    n1 = defs->at(insidx);
+    // Set new lidx for DEF
+    new_lrg(n1, maxlrg++);
+  }
+  //----------Phi Node Splitting----------
+  // Clean up a phi here, and assign a new live range number
+  // Cycle through this block's predecessors, collecting Reaches
+  // info for each spilled LRG and update edges.
+  // Walk the phis list to patch inputs, split phis, and name phis
+  for( insidx = 0; insidx < phis->size(); insidx++ ) {
+    Node *phi = phis->at(insidx);
+    assert(phi->is_Phi(),"This list must only contain Phi Nodes");
+    Block *b = _cfg._bbs[phi->_idx];
+    // Grab the live range number
+    uint lidx = Find_id(phi);
+    uint slidx = lrg2reach[lidx];
+    // Update node to lidx map
+    new_lrg(phi, maxlrg++);
+    // Get PASS1's up/down decision for the block.
+    int phi_up = !!UP_entry[slidx]->test(b->_pre_order);
+
+    // Force down if double-spilling live range
+    if( lrgs(lidx)._was_spilled1 )
+      phi_up = false;
+
+    // When splitting a Phi we an split it normal or "inverted".
+    // An inverted split makes the splits target the Phi's UP/DOWN
+    // sense inverted; then the Phi is followed by a final def-side
+    // split to invert back.  It changes which blocks the spill code
+    // goes in.
+
+    // Walk the predecessor blocks and assign the reaching def to the Phi.
+    // Split Phi nodes by placing USE side splits wherever the reaching
+    // DEF has the wrong UP/DOWN value.
+    for( uint i = 1; i < b->num_preds(); i++ ) {
+      // Get predecessor block pre-order number
+      Block *pred = _cfg._bbs[b->pred(i)->_idx];
+      pidx = pred->_pre_order;
+      // Grab reaching def
+      Node *def = Reaches[pidx][slidx];
+      assert( def, "must have reaching def" );
+      // If input up/down sense and reg-pressure DISagree
+      if( def->rematerialize() ) {
+        def = split_Rematerialize( def, pred, pred->end_idx(), maxlrg, splits, slidx, lrg2reach, Reachblock, false );
+        if( !def ) return 0;    // Bail out
+      }
+      // Update the Phi's input edge array
+      phi->set_req(i,def);
+      // Grab the UP/DOWN sense for the input
+      u1 = UP[pidx][slidx];
+      if( u1 != (phi_up != 0)) {
+        maxlrg = split_USE(def, b, phi, i, maxlrg, !u1, false, splits,slidx);
+        // If it wasn't split bail
+        if (!maxlrg) {
+          return 0;
+        }
+      }
+    }  // End for all inputs to the Phi
+  }  // End for all Phi Nodes
+  // Update _maxlrg to save Union asserts
+  _maxlrg = maxlrg;
+
+
+  //----------PASS 3----------
+  // Pass over all Phi's to union the live ranges
+  for( insidx = 0; insidx < phis->size(); insidx++ ) {
+    Node *phi = phis->at(insidx);
+    assert(phi->is_Phi(),"This list must only contain Phi Nodes");
+    // Walk all inputs to Phi and Union input live range with Phi live range
+    for( uint i = 1; i < phi->req(); i++ ) {
+      // Grab the input node
+      Node *n = phi->in(i);
+      assert( n, "" );
+      uint lidx = Find(n);
+      uint pidx = Find(phi);
+      if( lidx < pidx )
+        Union(n, phi);
+      else if( lidx > pidx )
+        Union(phi, n);
+    }  // End for all inputs to the Phi Node
+  }  // End for all Phi Nodes
+  // Now union all two address instructions
+  for( insidx = 0; insidx < defs->size(); insidx++ ) {
+    // Grab the def
+    n1 = defs->at(insidx);
+    // Set new lidx for DEF & handle 2-addr instructions
+    if( n1->is_Mach() && ((twoidx = n1->as_Mach()->two_adr()) != 0) ) {
+      assert( Find(n1->in(twoidx)) < maxlrg,"Assigning bad live range index");
+      // Union the input and output live ranges
+      uint lr1 = Find(n1);
+      uint lr2 = Find(n1->in(twoidx));
+      if( lr1 < lr2 )
+        Union(n1, n1->in(twoidx));
+      else if( lr1 > lr2 )
+        Union(n1->in(twoidx), n1);
+    }  // End if two address
+  }  // End for all defs
+  // DEBUG
+#ifdef ASSERT
+  // Validate all live range index assignments
+  for( bidx = 0; bidx < _cfg._num_blocks; bidx++ ) {
+    b  = _cfg._blocks[bidx];
+    for( insidx = 0; insidx <= b->end_idx(); insidx++ ) {
+      Node *n = b->_nodes[insidx];
+      uint defidx = Find(n);
+      assert(defidx < _maxlrg,"Bad live range index in Split");
+      assert(defidx < maxlrg,"Bad live range index in Split");
+    }
+  }
+  // Issue a warning if splitting made no progress
+  int noprogress = 0;
+  for( slidx = 0; slidx < spill_cnt; slidx++ ) {
+    if( PrintOpto && WizardMode && splits.at(slidx) == 0 ) {
+      tty->print_cr("Failed to split live range %d", lidxs.at(slidx));
+      //BREAKPOINT;
+    }
+    else {
+      noprogress++;
+    }
+  }
+  if(!noprogress) {
+    tty->print_cr("Failed to make progress in Split");
+    //BREAKPOINT;
+  }
+#endif
+  // Return updated count of live ranges
+  return maxlrg;
+}
diff --git a/src/share/vm/opto/regalloc.cpp b/src/share/vm/opto/regalloc.cpp
new file mode 100644
index 000000000..7c4d02797
--- /dev/null
+++ b/src/share/vm/opto/regalloc.cpp
@@ -0,0 +1,127 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_regalloc.cpp.incl"
+
+static const int NodeRegsOverflowSize = 200;
+
+void (*PhaseRegAlloc::_alloc_statistics[MAX_REG_ALLOCATORS])();
+int PhaseRegAlloc::_num_allocators = 0;
+#ifndef PRODUCT
+int PhaseRegAlloc::_total_framesize = 0;
+int PhaseRegAlloc::_max_framesize = 0;
+#endif
+
+PhaseRegAlloc::PhaseRegAlloc( uint unique, PhaseCFG &cfg,
+                              Matcher &matcher,
+                              void (*pr_stats)() ):
+               Phase(Register_Allocation), _cfg(cfg), _matcher(matcher),
+               _node_oops(Thread::current()->resource_area()),
+               _node_regs(0),
+               _framesize(0xdeadbeef)
+{
+    int i;
+
+    for (i=0; i < _num_allocators; i++) {
+        if (_alloc_statistics[i] == pr_stats)
+            return;
+    }
+    assert((_num_allocators + 1) < MAX_REG_ALLOCATORS, "too many register allocators");
+    _alloc_statistics[_num_allocators++] = pr_stats;
+}
+
+
+//------------------------------reg2offset-------------------------------------
+int PhaseRegAlloc::reg2offset_unchecked( OptoReg::Name reg ) const {
+  // Slots below _max_in_arg_stack_reg are offset by the entire frame.
+  // Slots above _max_in_arg_stack_reg are frame_slots and are not offset.
+  int slot = (reg < _matcher._new_SP)
+    ? reg - OptoReg::stack0() + _framesize
+    : reg - _matcher._new_SP;
+  // Note:  We use the direct formula (reg - SharedInfo::stack0) instead of
+  // OptoReg::reg2stack(reg), in order to avoid asserts in the latter
+  // function.  This routine must remain unchecked, so that dump_frame()
+  // can do its work undisturbed.
+  // %%% not really clear why reg2stack would assert here
+
+  return slot*VMRegImpl::stack_slot_size;
+}
+
+int PhaseRegAlloc::reg2offset( OptoReg::Name reg ) const {
+
+  // Not allowed in the out-preserve area.
+  // In-preserve area is allowed so Intel can fetch the return pc out.
+  assert( reg <  _matcher._old_SP ||
+          (reg >= OptoReg::add(_matcher._old_SP,C->out_preserve_stack_slots()) &&
+           reg <  _matcher._in_arg_limit) ||
+          reg >=  OptoReg::add(_matcher._new_SP,C->out_preserve_stack_slots()),
+          "register allocated in a preserve area" );
+  return reg2offset_unchecked( reg );
+}
+
+//------------------------------offset2reg-------------------------------------
+OptoReg::Name PhaseRegAlloc::offset2reg(int stk_offset) const {
+  int slot = stk_offset / jintSize;
+  int reg = (slot < (int) _framesize)
+    ? slot + _matcher._new_SP
+    : OptoReg::stack2reg(slot) - _framesize;
+  assert(stk_offset == reg2offset((OptoReg::Name) reg),
+         "offset2reg does not invert properly");
+  return (OptoReg::Name) reg;
+}
+
+//------------------------------set_oop----------------------------------------
+void PhaseRegAlloc::set_oop( const Node *n, bool is_an_oop ) {
+  if( is_an_oop ) {
+    _node_oops.set(n->_idx);
+  }
+}
+
+//------------------------------is_oop-----------------------------------------
+bool PhaseRegAlloc::is_oop( const Node *n ) const {
+  return _node_oops.test(n->_idx) != 0;
+}
+
+// Allocate _node_regs table with at least "size" elements
+void PhaseRegAlloc::alloc_node_regs(int size) {
+  _node_regs_max_index = size + (size >> 1) + NodeRegsOverflowSize;
+  _node_regs = NEW_RESOURCE_ARRAY( OptoRegPair, _node_regs_max_index );
+  // We assume our caller will fill in all elements up to size-1, so
+  // only the extra space we allocate is initialized here.
+  for( uint i = size; i < _node_regs_max_index; ++i )
+    _node_regs[i].set_bad();
+}
+
+#ifndef PRODUCT
+void
+PhaseRegAlloc::print_statistics() {
+  tty->print_cr("Total frameslots = %d, Max frameslots = %d", _total_framesize, _max_framesize);
+  int i;
+
+  for (i=0; i < _num_allocators; i++) {
+    _alloc_statistics[i]();
+  }
+}
+#endif
diff --git a/src/share/vm/opto/regalloc.hpp b/src/share/vm/opto/regalloc.hpp
new file mode 100644
index 000000000..37f7ba518
--- /dev/null
+++ b/src/share/vm/opto/regalloc.hpp
@@ -0,0 +1,133 @@
+/*
+ * Copyright 2000-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+class Node;
+class Matcher;
+class PhaseCFG;
+
+#define  MAX_REG_ALLOCATORS   10
+
+//------------------------------PhaseRegAlloc------------------------------------
+// Abstract register allocator
+class PhaseRegAlloc : public Phase {
+  static void (*_alloc_statistics[MAX_REG_ALLOCATORS])();
+  static int _num_allocators;
+
+protected:
+  OptoRegPair  *_node_regs;
+  uint         _node_regs_max_index;
+  VectorSet    _node_oops;         // Mapping from node indices to oopiness
+
+  void alloc_node_regs(int size);  // allocate _node_regs table with at least "size" elements
+
+  PhaseRegAlloc( uint unique, PhaseCFG &cfg, Matcher &matcher,
+                 void (*pr_stats)());
+public:
+  PhaseCFG &_cfg;               // Control flow graph
+  uint _framesize;              // Size of frame in stack-slots. not counting preserve area
+  OptoReg::Name _max_reg;       // Past largest register seen
+  Matcher &_matcher;            // Convert Ideal to MachNodes
+  uint node_regs_max_index() const { return _node_regs_max_index; }
+
+  // Get the register associated with the Node
+  OptoReg::Name get_reg_first( const Node *n ) const {
+    debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
+    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+    return _node_regs[n->_idx].first();
+  }
+  OptoReg::Name get_reg_second( const Node *n ) const {
+    debug_only( if( n->_idx >= _node_regs_max_index ) n->dump(); );
+    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+    return _node_regs[n->_idx].second();
+  }
+
+  // Do all the real work of allocate
+  virtual void Register_Allocate() = 0;
+
+
+  // notify the register allocator that "node" is a new reference
+  // to the value produced by "old_node"
+  virtual void add_reference( const Node *node, const Node *old_node) = 0;
+
+
+  // Set the register associated with a new Node
+  void set_bad( uint idx ) {
+    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+    _node_regs[idx].set_bad();
+  }
+  void set1( uint idx, OptoReg::Name reg ) {
+    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+    _node_regs[idx].set1(reg);
+  }
+  void set2( uint idx, OptoReg::Name reg ) {
+    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+    _node_regs[idx].set2(reg);
+  }
+  void set_pair( uint idx, OptoReg::Name hi, OptoReg::Name lo ) {
+    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+    _node_regs[idx].set_pair(hi, lo);
+  }
+  void set_ptr( uint idx, OptoReg::Name reg ) {
+    assert( idx < _node_regs_max_index, "Exceeded _node_regs array");
+    _node_regs[idx].set_ptr(reg);
+  }
+  // Set and query if a node produces an oop
+  void set_oop( const Node *n, bool );
+  bool is_oop( const Node *n ) const;
+
+  // Convert a register number to a stack offset
+  int reg2offset          ( OptoReg::Name reg ) const;
+  int reg2offset_unchecked( OptoReg::Name reg ) const;
+
+  // Convert a stack offset to a register number
+  OptoReg::Name offset2reg( int stk_offset ) const;
+
+  // Get the register encoding associated with the Node
+  int get_encode( const Node *n ) const {
+    assert( n->_idx < _node_regs_max_index, "Exceeded _node_regs array");
+    OptoReg::Name first = _node_regs[n->_idx].first();
+    OptoReg::Name second = _node_regs[n->_idx].second();
+    assert( !OptoReg::is_valid(second) || second == first+1, "" );
+    assert(OptoReg::is_reg(first), "out of range");
+    return Matcher::_regEncode[first];
+  }
+
+  // Platform dependent hook for actions prior to allocation
+  void  pd_preallocate_hook();
+
+#ifdef ASSERT
+  // Platform dependent hook for verification after allocation.  Will
+  // only get called when compiling with asserts.
+  void  pd_postallocate_verify_hook();
+#endif
+
+#ifndef PRODUCT
+  static int _total_framesize;
+  static int _max_framesize;
+
+  virtual void dump_frame() const = 0;
+  virtual char *dump_register( const Node *n, char *buf  ) const = 0;
+  static void print_statistics();
+#endif
+};
diff --git a/src/share/vm/opto/regmask.cpp b/src/share/vm/opto/regmask.cpp
new file mode 100644
index 000000000..782d1fa99
--- /dev/null
+++ b/src/share/vm/opto/regmask.cpp
@@ -0,0 +1,288 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_regmask.cpp.incl"
+
+#define RM_SIZE _RM_SIZE /* a constant private to the class RegMask */
+
+//-------------Non-zero bit search methods used by RegMask---------------------
+// Find lowest 1, or return 32 if empty
+int find_lowest_bit( uint32 mask ) {
+  int n = 0;
+  if( (mask & 0xffff) == 0 ) {
+    mask >>= 16;
+    n += 16;
+  }
+  if( (mask & 0xff) == 0 ) {
+    mask >>= 8;
+    n += 8;
+  }
+  if( (mask & 0xf) == 0 ) {
+    mask >>= 4;
+    n += 4;
+  }
+  if( (mask & 0x3) == 0 ) {
+    mask >>= 2;
+    n += 2;
+  }
+  if( (mask & 0x1) == 0 ) {
+    mask >>= 1;
+     n += 1;
+  }
+  if( mask == 0 ) {
+    n = 32;
+  }
+  return n;
+}
+
+// Find highest 1, or return 32 if empty
+int find_hihghest_bit( uint32 mask ) {
+  int n = 0;
+  if( mask > 0xffff ) {
+    mask >>= 16;
+    n += 16;
+  }
+  if( mask > 0xff ) {
+    mask >>= 8;
+    n += 8;
+  }
+  if( mask > 0xf ) {
+    mask >>= 4;
+    n += 4;
+  }
+  if( mask > 0x3 ) {
+    mask >>= 2;
+    n += 2;
+  }
+  if( mask > 0x1 ) {
+    mask >>= 1;
+    n += 1;
+  }
+  if( mask == 0 ) {
+    n = 32;
+  }
+  return n;
+}
+
+//------------------------------dump-------------------------------------------
+
+#ifndef PRODUCT
+void OptoReg::dump( int r ) {
+  switch( r ) {
+  case Special: tty->print("r---");   break;
+  case Bad:     tty->print("rBAD");   break;
+  default:
+    if( r < _last_Mach_Reg ) tty->print(Matcher::regName[r]);
+    else tty->print("rS%d",r);
+    break;
+  }
+}
+#endif
+
+
+//=============================================================================
+const RegMask RegMask::Empty(
+# define BODY(I) 0,
+  FORALL_BODY
+# undef BODY
+  0
+);
+
+//------------------------------find_first_pair--------------------------------
+// Find the lowest-numbered register pair in the mask.  Return the
+// HIGHEST register number in the pair, or BAD if no pairs.
+OptoReg::Name RegMask::find_first_pair() const {
+  VerifyPairs();
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    if( _A[i] ) {               // Found some bits
+      int bit = _A[i] & -_A[i]; // Extract low bit
+      // Convert to bit number, return hi bit in pair
+      return OptoReg::Name((i<<_LogWordBits)+find_lowest_bit(bit)+1);
+    }
+  }
+  return OptoReg::Bad;
+}
+
+//------------------------------ClearToPairs-----------------------------------
+// Clear out partial bits; leave only bit pairs
+void RegMask::ClearToPairs() {
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    int bits = _A[i];
+    bits &= ((bits & 0x55555555)<<1); // 1 hi-bit set for each pair
+    bits |= (bits>>1);          // Smear 1 hi-bit into a pair
+    _A[i] = bits;
+  }
+  VerifyPairs();
+}
+
+//------------------------------SmearToPairs-----------------------------------
+// Smear out partial bits; leave only bit pairs
+void RegMask::SmearToPairs() {
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    int bits = _A[i];
+    bits |= ((bits & 0x55555555)<<1); // Smear lo bit hi per pair
+    bits |= ((bits & 0xAAAAAAAA)>>1); // Smear hi bit lo per pair
+    _A[i] = bits;
+  }
+  VerifyPairs();
+}
+
+//------------------------------is_aligned_pairs-------------------------------
+bool RegMask::is_aligned_Pairs() const {
+  // Assert that the register mask contains only bit pairs.
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    int bits = _A[i];
+    while( bits ) {             // Check bits for pairing
+      int bit = bits & -bits;   // Extract low bit
+      // Low bit is not odd means its mis-aligned.
+      if( (bit & 0x55555555) == 0 ) return false;
+      bits -= bit;              // Remove bit from mask
+      // Check for aligned adjacent bit
+      if( (bits & (bit<<1)) == 0 ) return false;
+      bits -= (bit<<1);         // Remove other halve of pair
+    }
+  }
+  return true;
+}
+
+//------------------------------is_bound1--------------------------------------
+// Return TRUE if the mask contains a single bit
+int RegMask::is_bound1() const {
+  if( is_AllStack() ) return false;
+  int bit = -1;                 // Set to hold the one bit allowed
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    if( _A[i] ) {               // Found some bits
+      if( bit != -1 ) return false; // Already had bits, so fail
+      bit = _A[i] & -_A[i];     // Extract 1 bit from mask
+      if( bit != _A[i] ) return false; // Found many bits, so fail
+    }
+  }
+  // True for both the empty mask and for a single bit
+  return true;
+}
+
+//------------------------------is_bound2--------------------------------------
+// Return TRUE if the mask contains an adjacent pair of bits and no other bits.
+int RegMask::is_bound2() const {
+  if( is_AllStack() ) return false;
+
+  int bit = -1;                 // Set to hold the one bit allowed
+  for( int i = 0; i < RM_SIZE; i++ ) {
+    if( _A[i] ) {               // Found some bits
+      if( bit != -1 ) return false; // Already had bits, so fail
+      bit = _A[i] & -(_A[i]);   // Extract 1 bit from mask
+      if( (bit << 1) != 0 ) {   // Bit pair stays in same word?
+        if( (bit | (bit<<1)) != _A[i] )
+          return false;         // Require adjacent bit pair and no more bits
+      } else {                  // Else its a split-pair case
+        if( bit != _A[i] ) return false; // Found many bits, so fail
+        i++;                    // Skip iteration forward
+        if( _A[i] != 1 ) return false; // Require 1 lo bit in next word
+      }
+    }
+  }
+  // True for both the empty mask and for a bit pair
+  return true;
+}
+
+//------------------------------is_UP------------------------------------------
+// UP means register only, Register plus stack, or stack only is DOWN
+bool RegMask::is_UP() const {
+  // Quick common case check for DOWN (any stack slot is legal)
+  if( is_AllStack() )
+    return false;
+  // Slower check for any stack bits set (also DOWN)
+  if( overlap(Matcher::STACK_ONLY_mask) )
+    return false;
+  // Not DOWN, so must be UP
+  return true;
+}
+
+//------------------------------Size-------------------------------------------
+// Compute size of register mask in bits
+uint RegMask::Size() const {
+  extern uint8 bitsInByte[256];
+  uint sum = 0;
+  for( int i = 0; i < RM_SIZE; i++ )
+    sum +=
+      bitsInByte[(_A[i]>>24) & 0xff] +
+      bitsInByte[(_A[i]>>16) & 0xff] +
+      bitsInByte[(_A[i]>> 8) & 0xff] +
+      bitsInByte[ _A[i]      & 0xff];
+  return sum;
+}
+
+#ifndef PRODUCT
+//------------------------------print------------------------------------------
+void RegMask::dump( ) const {
+  tty->print("[");
+  RegMask rm = *this;           // Structure copy into local temp
+
+  OptoReg::Name start = rm.find_first_elem(); // Get a register
+  if( OptoReg::is_valid(start) ) { // Check for empty mask
+    rm.Remove(start);           // Yank from mask
+    OptoReg::dump(start);       // Print register
+    OptoReg::Name last = start;
+
+    // Now I have printed an initial register.
+    // Print adjacent registers as "rX-rZ" instead of "rX,rY,rZ".
+    // Begin looping over the remaining registers.
+    while( 1 ) {                //
+      OptoReg::Name reg = rm.find_first_elem(); // Get a register
+      if( !OptoReg::is_valid(reg) )
+        break;                  // Empty mask, end loop
+      rm.Remove(reg);           // Yank from mask
+
+      if( last+1 == reg ) {     // See if they are adjacent
+        // Adjacent registers just collect into long runs, no printing.
+        last = reg;
+      } else {                  // Ending some kind of run
+        if( start == last ) {   // 1-register run; no special printing
+        } else if( start+1 == last ) {
+          tty->print(",");      // 2-register run; print as "rX,rY"
+          OptoReg::dump(last);
+        } else {                // Multi-register run; print as "rX-rZ"
+          tty->print("-");
+          OptoReg::dump(last);
+        }
+        tty->print(",");        // Seperate start of new run
+        start = last = reg;     // Start a new register run
+        OptoReg::dump(start); // Print register
+      } // End of if ending a register run or not
+    } // End of while regmask not empty
+
+    if( start == last ) {       // 1-register run; no special printing
+    } else if( start+1 == last ) {
+      tty->print(",");          // 2-register run; print as "rX,rY"
+      OptoReg::dump(last);
+    } else {                    // Multi-register run; print as "rX-rZ"
+      tty->print("-");
+      OptoReg::dump(last);
+    }
+    if( rm.is_AllStack() ) tty->print("...");
+  }
+  tty->print("]");
+}
+#endif
diff --git a/src/share/vm/opto/regmask.hpp b/src/share/vm/opto/regmask.hpp
new file mode 100644
index 000000000..e34c8354f
--- /dev/null
+++ b/src/share/vm/opto/regmask.hpp
@@ -0,0 +1,264 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Some fun naming (textual) substitutions:
+//
+// RegMask::get_low_elem() ==> RegMask::find_first_elem()
+// RegMask::Special        ==> RegMask::Empty
+// RegMask::_flags         ==> RegMask::is_AllStack()
+// RegMask::operator<<=()  ==> RegMask::Insert()
+// RegMask::operator>>=()  ==> RegMask::Remove()
+// RegMask::Union()        ==> RegMask::OR
+// RegMask::Inter()        ==> RegMask::AND
+//
+// OptoRegister::RegName   ==> OptoReg::Name
+//
+// OptoReg::stack0()       ==> _last_Mach_Reg  or ZERO in core version
+//
+// numregs in chaitin      ==> proper degree in chaitin
+
+//-------------Non-zero bit search methods used by RegMask---------------------
+// Find lowest 1, or return 32 if empty
+int find_lowest_bit( uint32 mask );
+// Find highest 1, or return 32 if empty
+int find_hihghest_bit( uint32 mask );
+
+//------------------------------RegMask----------------------------------------
+// The ADL file describes how to print the machine-specific registers, as well
+// as any notion of register classes.  We provide a register mask, which is
+// just a collection of Register numbers.
+
+// The ADLC defines 2 macros, RM_SIZE and FORALL_BODY.
+// RM_SIZE is the size of a register mask in words.
+// FORALL_BODY replicates a BODY macro once per word in the register mask.
+// The usage is somewhat clumsy and limited to the regmask.[h,c]pp files.
+// However, it means the ADLC can redefine the unroll macro and all loops
+// over register masks will be unrolled by the correct amount.
+
+class RegMask VALUE_OBJ_CLASS_SPEC {
+  union {
+    double _dummy_force_double_alignment[RM_SIZE>>1];
+    // Array of Register Mask bits.  This array is large enough to cover
+    // all the machine registers and all parameters that need to be passed
+    // on the stack (stack registers) up to some interesting limit.  Methods
+    // that need more parameters will NOT be compiled.  On Intel, the limit
+    // is something like 90+ parameters.
+    int _A[RM_SIZE];
+  };
+
+  enum {
+    _WordBits    = BitsPerInt,
+    _LogWordBits = LogBitsPerInt,
+    _RM_SIZE     = RM_SIZE   // local constant, imported, then hidden by #undef
+  };
+
+public:
+  enum { CHUNK_SIZE = RM_SIZE*_WordBits };
+
+  // SlotsPerLong is 2, since slots are 32 bits and longs are 64 bits.
+  // Also, consider the maximum alignment size for a normally allocated
+  // value.  Since we allocate register pairs but not register quads (at
+  // present), this alignment is SlotsPerLong (== 2).  A normally
+  // aligned allocated register is either a single register, or a pair
+  // of adjacent registers, the lower-numbered being even.
+  // See also is_aligned_Pairs() below, and the padding added before
+  // Matcher::_new_SP to keep allocated pairs aligned properly.
+  // If we ever go to quad-word allocations, SlotsPerQuad will become
+  // the controlling alignment constraint.  Note that this alignment
+  // requirement is internal to the allocator, and independent of any
+  // particular platform.
+  enum { SlotsPerLong = 2 };
+
+  // A constructor only used by the ADLC output.  All mask fields are filled
+  // in directly.  Calls to this look something like RM(1,2,3,4);
+  RegMask(
+#   define BODY(I) int a##I,
+    FORALL_BODY
+#   undef BODY
+    int dummy = 0 ) {
+#   define BODY(I) _A[I] = a##I;
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Handy copying constructor
+  RegMask( RegMask *rm ) {
+#   define BODY(I) _A[I] = rm->_A[I];
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Construct an empty mask
+  RegMask( ) { Clear(); }
+
+  // Construct a mask with a single bit
+  RegMask( OptoReg::Name reg ) { Clear(); Insert(reg); }
+
+  // Check for register being in mask
+  int Member( OptoReg::Name reg ) const {
+    assert( reg < CHUNK_SIZE, "" );
+    return _A[reg>>_LogWordBits] & (1<<(reg&(_WordBits-1)));
+  }
+
+  // The last bit in the register mask indicates that the mask should repeat
+  // indefinitely with ONE bits.  Returns TRUE if mask is infinite or
+  // unbounded in size.  Returns FALSE if mask is finite size.
+  int is_AllStack() const { return _A[RM_SIZE-1] >> (_WordBits-1); }
+
+  // Work around an -xO3 optimization problme in WS6U1. The old way:
+  //   void set_AllStack() { _A[RM_SIZE-1] |= (1<<(_WordBits-1)); }
+  // will cause _A[RM_SIZE-1] to be clobbered, not updated when set_AllStack()
+  // follows an Insert() loop, like the one found in init_spill_mask(). Using
+  // Insert() instead works because the index into _A in computed instead of
+  // constant.  See bug 4665841.
+  void set_AllStack() { Insert(OptoReg::Name(CHUNK_SIZE-1)); }
+
+  // Test for being a not-empty mask.
+  int is_NotEmpty( ) const {
+    int tmp = 0;
+#   define BODY(I) tmp |= _A[I];
+    FORALL_BODY
+#   undef BODY
+    return tmp;
+  }
+
+  // Find lowest-numbered register from mask, or BAD if mask is empty.
+  OptoReg::Name find_first_elem() const {
+    int base, bits;
+#   define BODY(I) if( (bits = _A[I]) != 0 ) base = I<<_LogWordBits; else
+    FORALL_BODY
+#   undef BODY
+      { base = OptoReg::Bad; bits = 1<<0; }
+    return OptoReg::Name(base + find_lowest_bit(bits));
+  }
+  // Get highest-numbered register from mask, or BAD if mask is empty.
+  OptoReg::Name find_last_elem() const {
+    int base, bits;
+#   define BODY(I) if( (bits = _A[RM_SIZE-1-I]) != 0 ) base = (RM_SIZE-1-I)<<_LogWordBits; else
+    FORALL_BODY
+#   undef BODY
+      { base = OptoReg::Bad; bits = 1<<0; }
+    return OptoReg::Name(base + find_hihghest_bit(bits));
+  }
+
+  // Find the lowest-numbered register pair in the mask.  Return the
+  // HIGHEST register number in the pair, or BAD if no pairs.
+  // Assert that the mask contains only bit pairs.
+  OptoReg::Name find_first_pair() const;
+
+  // Clear out partial bits; leave only aligned adjacent bit pairs.
+  void ClearToPairs();
+  // Smear out partial bits; leave only aligned adjacent bit pairs.
+  void SmearToPairs();
+  // Verify that the mask contains only aligned adjacent bit pairs
+  void VerifyPairs() const { assert( is_aligned_Pairs(), "mask is not aligned, adjacent pairs" ); }
+  // Test that the mask contains only aligned adjacent bit pairs
+  bool is_aligned_Pairs() const;
+
+  // mask is a pair of misaligned registers
+  bool is_misaligned_Pair() const { return Size()==2 && !is_aligned_Pairs();}
+  // Test for single register
+  int is_bound1() const;
+  // Test for a single adjacent pair
+  int is_bound2() const;
+
+  // Fast overlap test.  Non-zero if any registers in common.
+  int overlap( const RegMask &rm ) const {
+    return
+#   define BODY(I) (_A[I] & rm._A[I]) |
+    FORALL_BODY
+#   undef BODY
+    0 ;
+  }
+
+  // Special test for register pressure based splitting
+  // UP means register only, Register plus stack, or stack only is DOWN
+  bool is_UP() const;
+
+  // Clear a register mask
+  void Clear( ) {
+#   define BODY(I) _A[I] = 0;
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Fill a register mask with 1's
+  void Set_All( ) {
+#   define BODY(I) _A[I] = -1;
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Insert register into mask
+  void Insert( OptoReg::Name reg ) {
+    assert( reg < CHUNK_SIZE, "" );
+    _A[reg>>_LogWordBits] |= (1<<(reg&(_WordBits-1)));
+  }
+
+  // Remove register from mask
+  void Remove( OptoReg::Name reg ) {
+    assert( reg < CHUNK_SIZE, "" );
+    _A[reg>>_LogWordBits] &= ~(1<<(reg&(_WordBits-1)));
+  }
+
+  // OR 'rm' into 'this'
+  void OR( const RegMask &rm ) {
+#   define BODY(I) this->_A[I] |= rm._A[I];
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // AND 'rm' into 'this'
+  void AND( const RegMask &rm ) {
+#   define BODY(I) this->_A[I] &= rm._A[I];
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Subtract 'rm' from 'this'
+  void SUBTRACT( const RegMask &rm ) {
+#   define BODY(I) _A[I] &= ~rm._A[I];
+    FORALL_BODY
+#   undef BODY
+  }
+
+  // Compute size of register mask: number of bits
+  uint Size() const;
+
+#ifndef PRODUCT
+  void print() const { dump(); }
+  void dump() const;            // Print a mask
+#endif
+
+  static const RegMask Empty;   // Common empty mask
+
+  static bool can_represent(OptoReg::Name reg) {
+    // NOTE: -1 in computation reflects the usage of the last
+    //       bit of the regmask as an infinite stack flag.
+    return (int)reg < (int)(CHUNK_SIZE-1);
+  }
+};
+
+// Do not use this constant directly in client code!
+#undef RM_SIZE
diff --git a/src/share/vm/opto/rootnode.cpp b/src/share/vm/opto/rootnode.cpp
new file mode 100644
index 000000000..44e0118ba
--- /dev/null
+++ b/src/share/vm/opto/rootnode.cpp
@@ -0,0 +1,81 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_rootnode.cpp.incl"
+
+//------------------------------Ideal------------------------------------------
+// Remove dead inputs
+Node *RootNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  for( uint i = 1; i < req(); i++ ) { // For all inputs
+    // Check for and remove dead inputs
+    if( phase->type(in(i)) == Type::TOP ) {
+      del_req(i--);             // Delete TOP inputs
+    }
+  }
+
+  // I used to do tail-splitting in the Ideal graph here, but it does not
+  // work.  The tail-splitting forces values live into the Return to be
+  // ready at a point which dominates the split returns.  This forces Stores
+  // to be hoisted high.  The "proper" fix would be to split Stores down
+  // each path, but this makes the split unprofitable.  If we want to do this
+  // optimization, it needs to be done after allocation so we can count all
+  // the instructions needing to be cloned in the cost metric.
+
+  // There used to be a spoof here for caffeine marks which completely
+  // eliminated very simple self-recursion recursions, but it's not worth it.
+  // Deep inlining of self-calls gets nearly all of the same benefits.
+  // If we want to get the rest of the win later, we should pattern match
+  // simple recursive call trees to closed-form solutions.
+
+  return NULL;                  // No further opportunities exposed
+}
+
+//=============================================================================
+HaltNode::HaltNode( Node *ctrl, Node *frameptr ) : Node(TypeFunc::Parms) {
+  Node* top = Compile::current()->top();
+  init_req(TypeFunc::Control,  ctrl        );
+  init_req(TypeFunc::I_O,      top);
+  init_req(TypeFunc::Memory,   top);
+  init_req(TypeFunc::FramePtr, frameptr    );
+  init_req(TypeFunc::ReturnAdr,top);
+}
+
+const Type *HaltNode::bottom_type() const { return Type::BOTTOM; }
+
+//------------------------------Ideal------------------------------------------
+Node *HaltNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  return remove_dead_region(phase, can_reshape) ? this : NULL;
+}
+
+//------------------------------Value------------------------------------------
+const Type *HaltNode::Value( PhaseTransform *phase ) const {
+  return ( phase->type(in(TypeFunc::Control)) == Type::TOP)
+    ? Type::TOP
+    : Type::BOTTOM;
+}
+
+const RegMask &HaltNode::out_RegMask() const {
+  return RegMask::Empty;
+}
diff --git a/src/share/vm/opto/rootnode.hpp b/src/share/vm/opto/rootnode.hpp
new file mode 100644
index 000000000..369bd9d6d
--- /dev/null
+++ b/src/share/vm/opto/rootnode.hpp
@@ -0,0 +1,62 @@
+/*
+ * Copyright 1997-2005 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------RootNode---------------------------------------
+// The one-and-only before-all-else and after-all-else RootNode.  The RootNode
+// represents what happens if the user runs the whole program repeatedly.  The
+// RootNode produces the initial values of I/O and memory for the program or
+// procedure start.
+class RootNode : public LoopNode {
+public:
+  RootNode( ) : LoopNode(0,0) {
+    init_class_id(Class_Root);
+    del_req(2);
+    del_req(1);
+  }
+  virtual int   Opcode() const;
+  virtual const Node *is_block_proj() const { return this; }
+  virtual const Type *bottom_type() const { return Type::BOTTOM; }
+  virtual Node *Identity( PhaseTransform *phase ) { return this; }
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const { return Type::BOTTOM; }
+};
+
+//------------------------------HaltNode---------------------------------------
+// Throw an exception & die
+class HaltNode : public Node {
+public:
+  HaltNode( Node *ctrl, Node *frameptr );
+  virtual int Opcode() const;
+  virtual bool  pinned() const { return true; };
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const;
+  virtual bool  is_CFG() const { return true; }
+  virtual uint hash() const { return NO_HASH; }  // CFG nodes do not hash
+  virtual bool depends_only_on_test() const { return false; }
+  virtual const Node *is_block_proj() const { return this; }
+  virtual const RegMask &out_RegMask() const;
+  virtual uint ideal_reg() const { return NotAMachineReg; }
+  virtual uint match_edge(uint idx) const { return 0; }
+};
diff --git a/src/share/vm/opto/runtime.cpp b/src/share/vm/opto/runtime.cpp
new file mode 100644
index 000000000..aedfceb51
--- /dev/null
+++ b/src/share/vm/opto/runtime.cpp
@@ -0,0 +1,1177 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_runtime.cpp.incl"
+
+
+// For debugging purposes:
+//  To force FullGCALot inside a runtime function, add the following two lines
+//
+//  Universe::release_fullgc_alot_dummy();
+//  MarkSweep::invoke(0, "Debugging");
+//
+// At command line specify the parameters: -XX:+FullGCALot -XX:FullGCALotStart=100000000
+
+
+
+
+// Compiled code entry points
+address OptoRuntime::_new_instance_Java                           = NULL;
+address OptoRuntime::_new_array_Java                              = NULL;
+address OptoRuntime::_multianewarray2_Java                        = NULL;
+address OptoRuntime::_multianewarray3_Java                        = NULL;
+address OptoRuntime::_multianewarray4_Java                        = NULL;
+address OptoRuntime::_multianewarray5_Java                        = NULL;
+address OptoRuntime::_vtable_must_compile_Java                    = NULL;
+address OptoRuntime::_complete_monitor_locking_Java               = NULL;
+address OptoRuntime::_rethrow_Java                                = NULL;
+
+address OptoRuntime::_slow_arraycopy_Java                         = NULL;
+address OptoRuntime::_register_finalizer_Java                     = NULL;
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+address OptoRuntime::_zap_dead_Java_locals_Java                   = NULL;
+address OptoRuntime::_zap_dead_native_locals_Java                 = NULL;
+# endif
+
+
+// This should be called in an assertion at the start of OptoRuntime routines
+// which are entered from compiled code (all of them)
+#ifndef PRODUCT
+static bool check_compiled_frame(JavaThread* thread) {
+  assert(thread->last_frame().is_runtime_frame(), "cannot call runtime directly from compiled code");
+#ifdef ASSERT
+  RegisterMap map(thread, false);
+  frame caller = thread->last_frame().sender(&map);
+  assert(caller.is_compiled_frame(), "not being called from compiled like code");
+#endif  /* ASSERT */
+  return true;
+}
+#endif
+
+
+#define gen(env, var, type_func_gen, c_func, fancy_jump, pass_tls, save_arg_regs, return_pc) \
+  var = generate_stub(env, type_func_gen, CAST_FROM_FN_PTR(address, c_func), #var, fancy_jump, pass_tls, save_arg_regs, return_pc)
+
+void OptoRuntime::generate(ciEnv* env) {
+
+  generate_exception_blob();
+
+  // Note: tls: Means fetching the return oop out of the thread-local storage
+  //
+  //   variable/name                       type-function-gen              , runtime method                  ,fncy_jp, tls,save_args,retpc
+  // -------------------------------------------------------------------------------------------------------------------------------
+  gen(env, _new_instance_Java              , new_instance_Type            , new_instance_C                  ,    0 , true , false, false);
+  gen(env, _new_array_Java                 , new_array_Type               , new_array_C                     ,    0 , true , false, false);
+  gen(env, _multianewarray2_Java           , multianewarray2_Type         , multianewarray2_C               ,    0 , true , false, false);
+  gen(env, _multianewarray3_Java           , multianewarray3_Type         , multianewarray3_C               ,    0 , true , false, false);
+  gen(env, _multianewarray4_Java           , multianewarray4_Type         , multianewarray4_C               ,    0 , true , false, false);
+  gen(env, _multianewarray5_Java           , multianewarray5_Type         , multianewarray5_C               ,    0 , true , false, false);
+  gen(env, _complete_monitor_locking_Java  , complete_monitor_enter_Type  , SharedRuntime::complete_monitor_locking_C      ,    0 , false, false, false);
+  gen(env, _rethrow_Java                   , rethrow_Type                 , rethrow_C                       ,    2 , true , false, true );
+
+  gen(env, _slow_arraycopy_Java            , slow_arraycopy_Type          , SharedRuntime::slow_arraycopy_C ,    0 , false, false, false);
+  gen(env, _register_finalizer_Java        , register_finalizer_Type      , register_finalizer              ,    0 , false, false, false);
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  gen(env, _zap_dead_Java_locals_Java      , zap_dead_locals_Type         , zap_dead_Java_locals_C          ,    0 , false, true , false );
+  gen(env, _zap_dead_native_locals_Java    , zap_dead_locals_Type         , zap_dead_native_locals_C        ,    0 , false, true , false );
+# endif
+
+}
+
+#undef gen
+
+
+// Helper method to do generation of RunTimeStub's
+address OptoRuntime::generate_stub( ciEnv* env,
+                                    TypeFunc_generator gen, address C_function,
+                                    const char *name, int is_fancy_jump,
+                                    bool pass_tls,
+                                    bool save_argument_registers,
+                                    bool return_pc ) {
+  ResourceMark rm;
+  Compile C( env, gen, C_function, name, is_fancy_jump, pass_tls, save_argument_registers, return_pc );
+  return  C.stub_entry_point();
+}
+
+const char* OptoRuntime::stub_name(address entry) {
+#ifndef PRODUCT
+  CodeBlob* cb = CodeCache::find_blob(entry);
+  RuntimeStub* rs =(RuntimeStub *)cb;
+  assert(rs != NULL && rs->is_runtime_stub(), "not a runtime stub");
+  return rs->name();
+#else
+  // Fast implementation for product mode (maybe it should be inlined too)
+  return "runtime stub";
+#endif
+}
+
+
+//=============================================================================
+// Opto compiler runtime routines
+//=============================================================================
+
+
+//=============================allocation======================================
+// We failed the fast-path allocation.  Now we need to do a scavenge or GC
+// and try allocation again.
+
+void OptoRuntime::do_eager_card_mark(JavaThread* thread) {
+  // After any safepoint, just before going back to compiled code,
+  // we perform a card mark.  This lets the compiled code omit
+  // card marks for initialization of new objects.
+  // Keep this code consistent with GraphKit::store_barrier.
+
+  oop new_obj = thread->vm_result();
+  if (new_obj == NULL)  return;
+
+  assert(Universe::heap()->can_elide_tlab_store_barriers(),
+         "compiler must check this first");
+  new_obj = Universe::heap()->new_store_barrier(new_obj);
+  thread->set_vm_result(new_obj);
+}
+
+// object allocation
+JRT_BLOCK_ENTRY(void, OptoRuntime::new_instance_C(klassOopDesc* klass, JavaThread* thread))
+  JRT_BLOCK;
+#ifndef PRODUCT
+  SharedRuntime::_new_instance_ctr++;         // new instance requires GC
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+
+  // These checks are cheap to make and support reflective allocation.
+  int lh = Klass::cast(klass)->layout_helper();
+  if (Klass::layout_helper_needs_slow_path(lh)
+      || !instanceKlass::cast(klass)->is_initialized()) {
+    KlassHandle kh(THREAD, klass);
+    kh->check_valid_for_instantiation(false, THREAD);
+    if (!HAS_PENDING_EXCEPTION) {
+      instanceKlass::cast(kh())->initialize(THREAD);
+    }
+    if (!HAS_PENDING_EXCEPTION) {
+      klass = kh();
+    } else {
+      klass = NULL;
+    }
+  }
+
+  if (klass != NULL) {
+    // Scavenge and allocate an instance.
+    oop result = instanceKlass::cast(klass)->allocate_instance(THREAD);
+    thread->set_vm_result(result);
+
+    // Pass oops back through thread local storage.  Our apparent type to Java
+    // is that we return an oop, but we can block on exit from this routine and
+    // a GC can trash the oop in C's return register.  The generated stub will
+    // fetch the oop from TLS after any possible GC.
+  }
+
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  JRT_BLOCK_END;
+
+  if (GraphKit::use_ReduceInitialCardMarks()) {
+    // do them now so we don't have to do them on the fast path
+    do_eager_card_mark(thread);
+  }
+JRT_END
+
+
+// array allocation
+JRT_BLOCK_ENTRY(void, OptoRuntime::new_array_C(klassOopDesc* array_type, int len, JavaThread *thread))
+  JRT_BLOCK;
+#ifndef PRODUCT
+  SharedRuntime::_new_array_ctr++;            // new array requires GC
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+
+  // Scavenge and allocate an instance.
+  oop result;
+
+  if (Klass::cast(array_type)->oop_is_typeArray()) {
+    // The oopFactory likes to work with the element type.
+    // (We could bypass the oopFactory, since it doesn't add much value.)
+    BasicType elem_type = typeArrayKlass::cast(array_type)->element_type();
+    result = oopFactory::new_typeArray(elem_type, len, THREAD);
+  } else {
+    // Although the oopFactory likes to work with the elem_type,
+    // the compiler prefers the array_type, since it must already have
+    // that latter value in hand for the fast path.
+    klassOopDesc* elem_type = objArrayKlass::cast(array_type)->element_klass();
+    result = oopFactory::new_objArray(elem_type, len, THREAD);
+  }
+
+  // Pass oops back through thread local storage.  Our apparent type to Java
+  // is that we return an oop, but we can block on exit from this routine and
+  // a GC can trash the oop in C's return register.  The generated stub will
+  // fetch the oop from TLS after any possible GC.
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(result);
+  JRT_BLOCK_END;
+
+  if (GraphKit::use_ReduceInitialCardMarks()) {
+    // do them now so we don't have to do them on the fast path
+    do_eager_card_mark(thread);
+  }
+JRT_END
+
+// Note: multianewarray for one dimension is handled inline by GraphKit::new_array.
+
+// multianewarray for 2 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray2_C(klassOopDesc* elem_type, int len1, int len2, JavaThread *thread))
+#ifndef PRODUCT
+  SharedRuntime::_multi2_ctr++;                // multianewarray for 1 dimension
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+  assert(oop(elem_type)->is_klass(), "not a class");
+  jint dims[2];
+  dims[0] = len1;
+  dims[1] = len2;
+  oop obj = arrayKlass::cast(elem_type)->multi_allocate(2, dims, THREAD);
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 3 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray3_C(klassOopDesc* elem_type, int len1, int len2, int len3, JavaThread *thread))
+#ifndef PRODUCT
+  SharedRuntime::_multi3_ctr++;                // multianewarray for 1 dimension
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+  assert(oop(elem_type)->is_klass(), "not a class");
+  jint dims[3];
+  dims[0] = len1;
+  dims[1] = len2;
+  dims[2] = len3;
+  oop obj = arrayKlass::cast(elem_type)->multi_allocate(3, dims, THREAD);
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 4 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray4_C(klassOopDesc* elem_type, int len1, int len2, int len3, int len4, JavaThread *thread))
+#ifndef PRODUCT
+  SharedRuntime::_multi4_ctr++;                // multianewarray for 1 dimension
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+  assert(oop(elem_type)->is_klass(), "not a class");
+  jint dims[4];
+  dims[0] = len1;
+  dims[1] = len2;
+  dims[2] = len3;
+  dims[3] = len4;
+  oop obj = arrayKlass::cast(elem_type)->multi_allocate(4, dims, THREAD);
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(obj);
+JRT_END
+
+// multianewarray for 5 dimensions
+JRT_ENTRY(void, OptoRuntime::multianewarray5_C(klassOopDesc* elem_type, int len1, int len2, int len3, int len4, int len5, JavaThread *thread))
+#ifndef PRODUCT
+  SharedRuntime::_multi5_ctr++;                // multianewarray for 1 dimension
+#endif
+  assert(check_compiled_frame(thread), "incorrect caller");
+  assert(oop(elem_type)->is_klass(), "not a class");
+  jint dims[5];
+  dims[0] = len1;
+  dims[1] = len2;
+  dims[2] = len3;
+  dims[3] = len4;
+  dims[4] = len5;
+  oop obj = arrayKlass::cast(elem_type)->multi_allocate(5, dims, THREAD);
+  deoptimize_caller_frame(thread, HAS_PENDING_EXCEPTION);
+  thread->set_vm_result(obj);
+JRT_END
+
+const TypeFunc *OptoRuntime::new_instance_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+
+const TypeFunc *OptoRuntime::athrow_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Klass to be allocated
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+
+const TypeFunc *OptoRuntime::new_array_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;   // element klass
+  fields[TypeFunc::Parms+1] = TypeInt::INT;       // array size
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::multianewarray_Type(int ndim) {
+  // create input type (domain)
+  const int nargs = ndim + 1;
+  const Type **fields = TypeTuple::fields(nargs);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;   // element klass
+  for( int i = 1; i < nargs; i++ )
+    fields[TypeFunc::Parms + i] = TypeInt::INT;       // array size
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+nargs, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::NOTNULL; // Returned oop
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::multianewarray2_Type() {
+  return multianewarray_Type(2);
+}
+
+const TypeFunc *OptoRuntime::multianewarray3_Type() {
+  return multianewarray_Type(3);
+}
+
+const TypeFunc *OptoRuntime::multianewarray4_Type() {
+  return multianewarray_Type(4);
+}
+
+const TypeFunc *OptoRuntime::multianewarray5_Type() {
+  return multianewarray_Type(5);
+}
+
+const TypeFunc *OptoRuntime::uncommon_trap_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  // symbolOop name of class to be loaded
+  fields[TypeFunc::Parms+0] = TypeInt::INT;
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+// Type used for stub generation for zap_dead_locals.
+// No inputs or outputs
+const TypeFunc *OptoRuntime::zap_dead_locals_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(0);
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms,fields);
+
+  return TypeFunc::make(domain,range);
+}
+# endif
+
+
+//-----------------------------------------------------------------------------
+// Monitor Handling
+const TypeFunc *OptoRuntime::complete_monitor_enter_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
+  fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;   // Address of stack location for lock
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+  return TypeFunc::make(domain,range);
+}
+
+
+//-----------------------------------------------------------------------------
+const TypeFunc *OptoRuntime::complete_monitor_exit_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // Object to be Locked
+  fields[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM;   // Address of stack location for lock
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+  return TypeFunc::make(domain,range);
+}
+
+const TypeFunc* OptoRuntime::flush_windows_Type() {
+  // create input type (domain)
+  const Type** fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms, fields);
+
+  // create result type
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::l2f_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeLong::LONG;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = Type::FLOAT;
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::modf_Type() {
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = Type::FLOAT;
+  fields[TypeFunc::Parms+1] = Type::FLOAT;
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = Type::FLOAT;
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc *OptoRuntime::Math_D_D_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  // symbolOop name of class to be loaded
+  fields[TypeFunc::Parms+0] = Type::DOUBLE;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = Type::DOUBLE;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::Math_DD_D_Type() {
+  const Type **fields = TypeTuple::fields(4);
+  fields[TypeFunc::Parms+0] = Type::DOUBLE;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  fields[TypeFunc::Parms+2] = Type::DOUBLE;
+  fields[TypeFunc::Parms+3] = Type::HALF;
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+4, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = Type::DOUBLE;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+//-------------- currentTimeMillis
+
+const TypeFunc* OptoRuntime::current_time_millis_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(0);
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+0, fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeLong::LONG;
+  fields[TypeFunc::Parms+1] = Type::HALF;
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+// arraycopy stub variations:
+enum ArrayCopyType {
+  ac_fast,                      // void(ptr, ptr, size_t)
+  ac_checkcast,                 //  int(ptr, ptr, size_t, size_t, ptr)
+  ac_slow,                      // void(ptr, int, ptr, int, int)
+  ac_generic                    //  int(ptr, int, ptr, int, int)
+};
+
+static const TypeFunc* make_arraycopy_Type(ArrayCopyType act) {
+  // create input type (domain)
+  int num_args      = (act == ac_fast ? 3 : 5);
+  int num_size_args = (act == ac_fast ? 1 : act == ac_checkcast ? 2 : 0);
+  int argcnt = num_args;
+  LP64_ONLY(argcnt += num_size_args); // halfwords for lengths
+  const Type** fields = TypeTuple::fields(argcnt);
+  int argp = TypeFunc::Parms;
+  fields[argp++] = TypePtr::NOTNULL;    // src
+  if (num_size_args == 0) {
+    fields[argp++] = TypeInt::INT;      // src_pos
+  }
+  fields[argp++] = TypePtr::NOTNULL;    // dest
+  if (num_size_args == 0) {
+    fields[argp++] = TypeInt::INT;      // dest_pos
+    fields[argp++] = TypeInt::INT;      // length
+  }
+  while (num_size_args-- > 0) {
+    fields[argp++] = TypeX_X;               // size in whatevers (size_t)
+    LP64_ONLY(fields[argp++] = Type::HALF); // other half of long length
+  }
+  if (act == ac_checkcast) {
+    fields[argp++] = TypePtr::NOTNULL;  // super_klass
+  }
+  assert(argp == TypeFunc::Parms+argcnt, "correct decoding of act");
+  const TypeTuple* domain = TypeTuple::make(TypeFunc::Parms+argcnt, fields);
+
+  // create result type if needed
+  int retcnt = (act == ac_checkcast || act == ac_generic ? 1 : 0);
+  fields = TypeTuple::fields(1);
+  if (retcnt == 0)
+    fields[TypeFunc::Parms+0] = NULL; // void
+  else
+    fields[TypeFunc::Parms+0] = TypeInt::INT; // status result, if needed
+  const TypeTuple* range = TypeTuple::make(TypeFunc::Parms+retcnt, fields);
+  return TypeFunc::make(domain, range);
+}
+
+const TypeFunc* OptoRuntime::fast_arraycopy_Type() {
+  // This signature is simple:  Two base pointers and a size_t.
+  return make_arraycopy_Type(ac_fast);
+}
+
+const TypeFunc* OptoRuntime::checkcast_arraycopy_Type() {
+  // An extension of fast_arraycopy_Type which adds type checking.
+  return make_arraycopy_Type(ac_checkcast);
+}
+
+const TypeFunc* OptoRuntime::slow_arraycopy_Type() {
+  // This signature is exactly the same as System.arraycopy.
+  // There are no intptr_t (int/long) arguments.
+  return make_arraycopy_Type(ac_slow);
+}
+
+const TypeFunc* OptoRuntime::generic_arraycopy_Type() {
+  // This signature is like System.arraycopy, except that it returns status.
+  return make_arraycopy_Type(ac_generic);
+}
+
+
+//------------- Interpreter state access for on stack replacement
+const TypeFunc* OptoRuntime::osr_end_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // OSR temp buf
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  // create result type
+  fields = TypeTuple::fields(1);
+  // fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // locked oop
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain, range);
+}
+
+//-------------- methodData update helpers
+
+const TypeFunc* OptoRuntime::profile_receiver_type_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeAryPtr::NOTNULL;    // methodData pointer
+  fields[TypeFunc::Parms+1] = TypeInstPtr::BOTTOM;    // receiver oop
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2, fields);
+
+  // create result type
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = NULL; // void
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms, fields);
+  return TypeFunc::make(domain,range);
+}
+
+JRT_LEAF(void, OptoRuntime::profile_receiver_type_C(DataLayout* data, oopDesc* receiver))
+  if (receiver == NULL) return;
+  klassOop receiver_klass = receiver->klass();
+
+  intptr_t* mdp = ((intptr_t*)(data)) + DataLayout::header_size_in_cells();
+  int empty_row = -1;           // free row, if any is encountered
+
+  // ReceiverTypeData* vc = new ReceiverTypeData(mdp);
+  for (uint row = 0; row < ReceiverTypeData::row_limit(); row++) {
+    // if (vc->receiver(row) == receiver_klass)
+    int receiver_off = ReceiverTypeData::receiver_cell_index(row);
+    intptr_t row_recv = *(mdp + receiver_off);
+    if (row_recv == (intptr_t) receiver_klass) {
+      // vc->set_receiver_count(row, vc->receiver_count(row) + DataLayout::counter_increment);
+      int count_off = ReceiverTypeData::receiver_count_cell_index(row);
+      *(mdp + count_off) += DataLayout::counter_increment;
+      return;
+    } else if (row_recv == 0) {
+      // else if (vc->receiver(row) == NULL)
+      empty_row = (int) row;
+    }
+  }
+
+  if (empty_row != -1) {
+    int receiver_off = ReceiverTypeData::receiver_cell_index(empty_row);
+    // vc->set_receiver(empty_row, receiver_klass);
+    *(mdp + receiver_off) = (intptr_t) receiver_klass;
+    // vc->set_receiver_count(empty_row, DataLayout::counter_increment);
+    int count_off = ReceiverTypeData::receiver_count_cell_index(empty_row);
+    *(mdp + count_off) = DataLayout::counter_increment;
+  }
+JRT_END
+
+//-----------------------------------------------------------------------------
+// implicit exception support.
+
+static void report_null_exception_in_code_cache(address exception_pc) {
+  ResourceMark rm;
+  CodeBlob* n = CodeCache::find_blob(exception_pc);
+  if (n != NULL) {
+    tty->print_cr("#");
+    tty->print_cr("# HotSpot Runtime Error, null exception in generated code");
+    tty->print_cr("#");
+    tty->print_cr("# pc where exception happened = " INTPTR_FORMAT, exception_pc);
+
+    if (n->is_nmethod()) {
+      methodOop method = ((nmethod*)n)->method();
+      tty->print_cr("# Method where it happened %s.%s ", Klass::cast(method->method_holder())->name()->as_C_string(), method->name()->as_C_string());
+      tty->print_cr("#");
+      if (ShowMessageBoxOnError && UpdateHotSpotCompilerFileOnError) {
+        const char* title    = "HotSpot Runtime Error";
+        const char* question = "Do you want to exclude compilation of this method in future runs?";
+        if (os::message_box(title, question)) {
+          CompilerOracle::append_comment_to_file("");
+          CompilerOracle::append_comment_to_file("Null exception in compiled code resulted in the following exclude");
+          CompilerOracle::append_comment_to_file("");
+          CompilerOracle::append_exclude_to_file(method);
+          tty->print_cr("#");
+          tty->print_cr("# %s has been updated to exclude the specified method", CompileCommandFile);
+          tty->print_cr("#");
+        }
+      }
+      fatal("Implicit null exception happened in compiled method");
+    } else {
+      n->print();
+      fatal("Implicit null exception happened in generated stub");
+    }
+  }
+  fatal("Implicit null exception at wrong place");
+}
+
+
+//-------------------------------------------------------------------------------------
+// register policy
+
+bool OptoRuntime::is_callee_saved_register(MachRegisterNumbers reg) {
+  assert(reg >= 0 && reg < _last_Mach_Reg, "must be a machine register");
+  switch (register_save_policy[reg]) {
+    case 'C': return false; //SOC
+    case 'E': return true ; //SOE
+    case 'N': return false; //NS
+    case 'A': return false; //AS
+  }
+  ShouldNotReachHere();
+  return false;
+}
+
+//-----------------------------------------------------------------------
+// Exceptions
+//
+
+static void trace_exception(oop exception_oop, address exception_pc, const char* msg) PRODUCT_RETURN;
+
+// The method is an entry that is always called by a C++ method not
+// directly from compiled code. Compiled code will call the C++ method following.
+// We can't allow async exception to be installed during  exception processing.
+JRT_ENTRY_NO_ASYNC(address, OptoRuntime::handle_exception_C_helper(JavaThread* thread, nmethod* &nm))
+
+  // Do not confuse exception_oop with pending_exception. The exception_oop
+  // is only used to pass arguments into the method. Not for general
+  // exception handling.  DO NOT CHANGE IT to use pending_exception, since
+  // the runtime stubs checks this on exit.
+  assert(thread->exception_oop() != NULL, "exception oop is found");
+  address handler_address = NULL;
+
+  Handle exception(thread, thread->exception_oop());
+
+  if (TraceExceptions) {
+    trace_exception(exception(), thread->exception_pc(), "");
+  }
+  // for AbortVMOnException flag
+  NOT_PRODUCT(Exceptions::debug_check_abort(exception));
+
+  #ifdef ASSERT
+    if (!(exception->is_a(SystemDictionary::throwable_klass()))) {
+      // should throw an exception here
+      ShouldNotReachHere();
+    }
+  #endif
+
+
+  // new exception handling: this method is entered only from adapters
+  // exceptions from compiled java methods are handled in compiled code
+  // using rethrow node
+
+  address pc = thread->exception_pc();
+  nm = CodeCache::find_nmethod(pc);
+  assert(nm != NULL, "No NMethod found");
+  if (nm->is_native_method()) {
+    fatal("Native mathod should not have path to exception handling");
+  } else {
+    // we are switching to old paradigm: search for exception handler in caller_frame
+    // instead in exception handler of caller_frame.sender()
+
+    if (JvmtiExport::can_post_exceptions()) {
+      // "Full-speed catching" is not necessary here,
+      // since we're notifying the VM on every catch.
+      // Force deoptimization and the rest of the lookup
+      // will be fine.
+      deoptimize_caller_frame(thread, true);
+    }
+
+    // Check the stack guard pages.  If enabled, look for handler in this frame;
+    // otherwise, forcibly unwind the frame.
+    //
+    // 4826555: use default current sp for reguard_stack instead of &nm: it's more accurate.
+    bool force_unwind = !thread->reguard_stack();
+    bool deopting = false;
+    if (nm->is_deopt_pc(pc)) {
+      deopting = true;
+      RegisterMap map(thread, false);
+      frame deoptee = thread->last_frame().sender(&map);
+      assert(deoptee.is_deoptimized_frame(), "must be deopted");
+      // Adjust the pc back to the original throwing pc
+      pc = deoptee.pc();
+    }
+
+    // If we are forcing an unwind because of stack overflow then deopt is
+    // irrelevant sice we are throwing the frame away anyway.
+
+    if (deopting && !force_unwind) {
+      handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
+    } else {
+
+      handler_address =
+        force_unwind ? NULL : nm->handler_for_exception_and_pc(exception, pc);
+
+      if (handler_address == NULL) {
+        handler_address = SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true);
+        assert (handler_address != NULL, "must have compiled handler");
+        // Update the exception cache only when the unwind was not forced.
+        if (!force_unwind) {
+          nm->add_handler_for_exception_and_pc(exception,pc,handler_address);
+        }
+      } else {
+        assert(handler_address == SharedRuntime::compute_compiled_exc_handler(nm, pc, exception, force_unwind, true), "Must be the same");
+      }
+    }
+
+    thread->set_exception_pc(pc);
+    thread->set_exception_handler_pc(handler_address);
+    thread->set_exception_stack_size(0);
+  }
+
+  // Restore correct return pc.  Was saved above.
+  thread->set_exception_oop(exception());
+  return handler_address;
+
+JRT_END
+
+// We are entering here from exception_blob
+// If there is a compiled exception handler in this method, we will continue there;
+// otherwise we will unwind the stack and continue at the caller of top frame method
+// Note we enter without the usual JRT wrapper. We will call a helper routine that
+// will do the normal VM entry. We do it this way so that we can see if the nmethod
+// we looked up the handler for has been deoptimized in the meantime. If it has been
+// we must not use the handler and instread return the deopt blob.
+address OptoRuntime::handle_exception_C(JavaThread* thread) {
+//
+// We are in Java not VM and in debug mode we have a NoHandleMark
+//
+#ifndef PRODUCT
+  SharedRuntime::_find_handler_ctr++;          // find exception handler
+#endif
+  debug_only(NoHandleMark __hm;)
+  nmethod* nm = NULL;
+  address handler_address = NULL;
+  {
+    // Enter the VM
+
+    ResetNoHandleMark rnhm;
+    handler_address = handle_exception_C_helper(thread, nm);
+  }
+
+  // Back in java: Use no oops, DON'T safepoint
+
+  // Now check to see if the handler we are returning is in a now
+  // deoptimized frame
+
+  if (nm != NULL) {
+    RegisterMap map(thread, false);
+    frame caller = thread->last_frame().sender(&map);
+#ifdef ASSERT
+    assert(caller.is_compiled_frame(), "must be");
+#endif // ASSERT
+    if (caller.is_deoptimized_frame()) {
+      handler_address = SharedRuntime::deopt_blob()->unpack_with_exception();
+    }
+  }
+  return handler_address;
+}
+
+//------------------------------rethrow----------------------------------------
+// We get here after compiled code has executed a 'RethrowNode'.  The callee
+// is either throwing or rethrowing an exception.  The callee-save registers
+// have been restored, synchronized objects have been unlocked and the callee
+// stack frame has been removed.  The return address was passed in.
+// Exception oop is passed as the 1st argument.  This routine is then called
+// from the stub.  On exit, we know where to jump in the caller's code.
+// After this C code exits, the stub will pop his frame and end in a jump
+// (instead of a return).  We enter the caller's default handler.
+//
+// This must be JRT_LEAF:
+//     - caller will not change its state as we cannot block on exit,
+//       therefore raw_exception_handler_for_return_address is all it takes
+//       to handle deoptimized blobs
+//
+// However, there needs to be a safepoint check in the middle!  So compiled
+// safepoints are completely watertight.
+//
+// Thus, it cannot be a leaf since it contains the No_GC_Verifier.
+//
+// *THIS IS NOT RECOMMENDED PROGRAMMING STYLE*
+//
+address OptoRuntime::rethrow_C(oopDesc* exception, JavaThread* thread, address ret_pc) {
+#ifndef PRODUCT
+  SharedRuntime::_rethrow_ctr++;               // count rethrows
+#endif
+  assert (exception != NULL, "should have thrown a NULLPointerException");
+#ifdef ASSERT
+  if (!(exception->is_a(SystemDictionary::throwable_klass()))) {
+    // should throw an exception here
+    ShouldNotReachHere();
+  }
+#endif
+
+  thread->set_vm_result(exception);
+  // Frame not compiled (handles deoptimization blob)
+  return SharedRuntime::raw_exception_handler_for_return_address(ret_pc);
+}
+
+
+const TypeFunc *OptoRuntime::rethrow_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL; // Exception oop
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+1, fields);
+
+  return TypeFunc::make(domain, range);
+}
+
+
+void OptoRuntime::deoptimize_caller_frame(JavaThread *thread, bool doit) {
+  // Deoptimize frame
+  if (doit) {
+    // Called from within the owner thread, so no need for safepoint
+    RegisterMap reg_map(thread);
+    frame stub_frame = thread->last_frame();
+    assert(stub_frame.is_runtime_frame() || exception_blob()->contains(stub_frame.pc()), "sanity check");
+    frame caller_frame = stub_frame.sender(&reg_map);
+
+    VM_DeoptimizeFrame deopt(thread, caller_frame.id());
+    VMThread::execute(&deopt);
+  }
+}
+
+
+const TypeFunc *OptoRuntime::register_finalizer_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(1);
+  fields[TypeFunc::Parms+0] = TypeInstPtr::NOTNULL;  // oop;          Receiver
+  // // The JavaThread* is passed to each routine as the last argument
+  // fields[TypeFunc::Parms+1] = TypeRawPtr::NOTNULL;  // JavaThread *; Executing thread
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+1,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+  return TypeFunc::make(domain,range);
+}
+
+
+//-----------------------------------------------------------------------------
+// Dtrace support.  entry and exit probes have the same signature
+const TypeFunc *OptoRuntime::dtrace_method_entry_exit_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
+  fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL;  // methodOop;    Method we are entering
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+  return TypeFunc::make(domain,range);
+}
+
+const TypeFunc *OptoRuntime::dtrace_object_alloc_Type() {
+  // create input type (domain)
+  const Type **fields = TypeTuple::fields(2);
+  fields[TypeFunc::Parms+0] = TypeRawPtr::BOTTOM; // Thread-local storage
+  fields[TypeFunc::Parms+1] = TypeInstPtr::NOTNULL;  // oop;    newly allocated object
+
+  const TypeTuple *domain = TypeTuple::make(TypeFunc::Parms+2,fields);
+
+  // create result type (range)
+  fields = TypeTuple::fields(0);
+
+  const TypeTuple *range = TypeTuple::make(TypeFunc::Parms+0,fields);
+
+  return TypeFunc::make(domain,range);
+}
+
+
+JRT_ENTRY_NO_ASYNC(void, OptoRuntime::register_finalizer(oopDesc* obj, JavaThread* thread))
+  assert(obj->is_oop(), "must be a valid oop");
+  assert(obj->klass()->klass_part()->has_finalizer(), "shouldn't be here otherwise");
+  instanceKlass::register_finalizer(instanceOop(obj), CHECK);
+JRT_END
+
+//-----------------------------------------------------------------------------
+
+NamedCounter * volatile OptoRuntime::_named_counters = NULL;
+
+//
+// dump the collected NamedCounters.
+//
+void OptoRuntime::print_named_counters() {
+  int total_lock_count = 0;
+  int eliminated_lock_count = 0;
+
+  NamedCounter* c = _named_counters;
+  while (c) {
+    if (c->tag() == NamedCounter::LockCounter || c->tag() == NamedCounter::EliminatedLockCounter) {
+      int count = c->count();
+      if (count > 0) {
+        bool eliminated = c->tag() == NamedCounter::EliminatedLockCounter;
+        if (Verbose) {
+          tty->print_cr("%d %s%s", count, c->name(), eliminated ? " (eliminated)" : "");
+        }
+        total_lock_count += count;
+        if (eliminated) {
+          eliminated_lock_count += count;
+        }
+      }
+    } else if (c->tag() == NamedCounter::BiasedLockingCounter) {
+      BiasedLockingCounters* blc = ((BiasedLockingNamedCounter*)c)->counters();
+      if (blc->nonzero()) {
+        tty->print_cr("%s", c->name());
+        blc->print_on(tty);
+      }
+    }
+    c = c->next();
+  }
+  if (total_lock_count > 0) {
+    tty->print_cr("dynamic locks: %d", total_lock_count);
+    if (eliminated_lock_count) {
+      tty->print_cr("eliminated locks: %d (%d%%)", eliminated_lock_count,
+                    (int)(eliminated_lock_count * 100.0 / total_lock_count));
+    }
+  }
+}
+
+//
+//  Allocate a new NamedCounter.  The JVMState is used to generate the
+//  name which consists of method@line for the inlining tree.
+//
+
+NamedCounter* OptoRuntime::new_named_counter(JVMState* youngest_jvms, NamedCounter::CounterTag tag) {
+  int max_depth = youngest_jvms->depth();
+
+  // Visit scopes from youngest to oldest.
+  bool first = true;
+  stringStream st;
+  for (int depth = max_depth; depth >= 1; depth--) {
+    JVMState* jvms = youngest_jvms->of_depth(depth);
+    ciMethod* m = jvms->has_method() ? jvms->method() : NULL;
+    if (!first) {
+      st.print(" ");
+    } else {
+      first = false;
+    }
+    int bci = jvms->bci();
+    if (bci < 0) bci = 0;
+    st.print("%s.%s@%d", m->holder()->name()->as_utf8(), m->name()->as_utf8(), bci);
+    // To print linenumbers instead of bci use: m->line_number_from_bci(bci)
+  }
+  NamedCounter* c;
+  if (tag == NamedCounter::BiasedLockingCounter) {
+    c = new BiasedLockingNamedCounter(strdup(st.as_string()));
+  } else {
+    c = new NamedCounter(strdup(st.as_string()), tag);
+  }
+
+  // atomically add the new counter to the head of the list.  We only
+  // add counters so this is safe.
+  NamedCounter* head;
+  do {
+    head = _named_counters;
+    c->set_next(head);
+  } while (Atomic::cmpxchg_ptr(c, &_named_counters, head) != head);
+  return c;
+}
+
+//-----------------------------------------------------------------------------
+// Non-product code
+#ifndef PRODUCT
+
+int trace_exception_counter = 0;
+static void trace_exception(oop exception_oop, address exception_pc, const char* msg) {
+  ttyLocker ttyl;
+  trace_exception_counter++;
+  tty->print("%d [Exception (%s): ", trace_exception_counter, msg);
+  exception_oop->print_value();
+  tty->print(" in ");
+  CodeBlob* blob = CodeCache::find_blob(exception_pc);
+  if (blob->is_nmethod()) {
+    ((nmethod*)blob)->method()->print_value();
+  } else if (blob->is_runtime_stub()) {
+    tty->print("<runtime-stub>");
+  } else {
+    tty->print("<unknown>");
+  }
+  tty->print(" at " INTPTR_FORMAT,  exception_pc);
+  tty->print_cr("]");
+}
+
+#endif  // PRODUCT
+
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+// Called from call sites in compiled code with oop maps (actually safepoints)
+// Zaps dead locals in first java frame.
+// Is entry because may need to lock to generate oop maps
+// Currently, only used for compiler frames, but someday may be used
+// for interpreter frames, too.
+
+int OptoRuntime::ZapDeadCompiledLocals_count = 0;
+
+// avoid pointers to member funcs with these helpers
+static bool is_java_frame(  frame* f) { return f->is_java_frame();   }
+static bool is_native_frame(frame* f) { return f->is_native_frame(); }
+
+
+void OptoRuntime::zap_dead_java_or_native_locals(JavaThread* thread,
+                                                bool (*is_this_the_right_frame_to_zap)(frame*)) {
+  assert(JavaThread::current() == thread, "is this needed?");
+
+  if ( !ZapDeadCompiledLocals )  return;
+
+  bool skip = false;
+
+       if ( ZapDeadCompiledLocalsFirst  ==  0  ) ; // nothing special
+  else if ( ZapDeadCompiledLocalsFirst  >  ZapDeadCompiledLocals_count )  skip = true;
+  else if ( ZapDeadCompiledLocalsFirst  == ZapDeadCompiledLocals_count )
+    warning("starting zapping after skipping");
+
+       if ( ZapDeadCompiledLocalsLast  ==  -1  ) ; // nothing special
+  else if ( ZapDeadCompiledLocalsLast  <   ZapDeadCompiledLocals_count )  skip = true;
+  else if ( ZapDeadCompiledLocalsLast  ==  ZapDeadCompiledLocals_count )
+    warning("about to zap last zap");
+
+  ++ZapDeadCompiledLocals_count; // counts skipped zaps, too
+
+  if ( skip )  return;
+
+  // find java frame and zap it
+
+  for (StackFrameStream sfs(thread);  !sfs.is_done();  sfs.next()) {
+    if (is_this_the_right_frame_to_zap(sfs.current()) ) {
+      sfs.current()->zap_dead_locals(thread, sfs.register_map());
+      return;
+    }
+  }
+  warning("no frame found to zap in zap_dead_Java_locals_C");
+}
+
+JRT_LEAF(void, OptoRuntime::zap_dead_Java_locals_C(JavaThread* thread))
+  zap_dead_java_or_native_locals(thread, is_java_frame);
+JRT_END
+
+// The following does not work because for one thing, the
+// thread state is wrong; it expects java, but it is native.
+// Also, the invarients in a native stub are different and
+// I'm not sure it is safe to have a MachCalRuntimeDirectNode
+// in there.
+// So for now, we do not zap in native stubs.
+
+JRT_LEAF(void, OptoRuntime::zap_dead_native_locals_C(JavaThread* thread))
+  zap_dead_java_or_native_locals(thread, is_native_frame);
+JRT_END
+
+# endif
diff --git a/src/share/vm/opto/runtime.hpp b/src/share/vm/opto/runtime.hpp
new file mode 100644
index 000000000..50f117120
--- /dev/null
+++ b/src/share/vm/opto/runtime.hpp
@@ -0,0 +1,289 @@
+/*
+ * Copyright 1998-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+//------------------------------OptoRuntime------------------------------------
+// Opto compiler runtime routines
+//
+// These are all generated from Ideal graphs.  They are called with the
+// Java calling convention.  Internally they call C++.  They are made once at
+// startup time and Opto compiles calls to them later.
+// Things are broken up into quads: the signature they will be called with,
+// the address of the generated code, the corresponding C++ code and an
+// nmethod.
+
+// The signature (returned by "xxx_Type()") is used at startup time by the
+// Generator to make the generated code "xxx_Java".  Opto compiles calls
+// to the generated code "xxx_Java".  When the compiled code gets executed,
+// it calls the C++ code "xxx_C".  The generated nmethod is saved in the
+// CodeCache.  Exception handlers use the nmethod to get the callee-save
+// register OopMaps.
+class CallInfo;
+
+//
+// NamedCounters are tagged counters which can be used for profiling
+// code in various ways.  Currently they are used by the lock coarsening code
+//
+
+class NamedCounter : public CHeapObj {
+public:
+    enum CounterTag {
+    NoTag,
+    LockCounter,
+    EliminatedLockCounter,
+    BiasedLockingCounter
+  };
+
+private:
+  const char *  _name;
+  int           _count;
+  CounterTag    _tag;
+  NamedCounter* _next;
+
+ public:
+  NamedCounter(const char *n, CounterTag tag = NoTag):
+    _name(n),
+    _count(0),
+    _next(NULL),
+    _tag(tag) {}
+
+  const char * name() const     { return _name; }
+  int count() const             { return _count; }
+  address addr()                { return (address)&_count; }
+  CounterTag tag() const        { return _tag; }
+  void set_tag(CounterTag tag)  { _tag = tag; }
+
+  NamedCounter* next() const    { return _next; }
+  void set_next(NamedCounter* next) {
+    assert(_next == NULL, "already set");
+    _next = next;
+  }
+
+};
+
+class BiasedLockingNamedCounter : public NamedCounter {
+ private:
+  BiasedLockingCounters _counters;
+
+ public:
+  BiasedLockingNamedCounter(const char *n) :
+    NamedCounter(n, BiasedLockingCounter), _counters() {}
+
+  BiasedLockingCounters* counters() { return &_counters; }
+};
+
+typedef const TypeFunc*(*TypeFunc_generator)();
+
+class OptoRuntime : public AllStatic {
+  friend class Matcher;  // allow access to stub names
+
+ private:
+  // define stubs
+  static address generate_stub(ciEnv* ci_env, TypeFunc_generator gen, address C_function, const char *name, int is_fancy_jump, bool pass_tls, bool save_arguments, bool return_pc);
+
+  // References to generated stubs
+  static address _new_instance_Java;
+  static address _new_array_Java;
+  static address _multianewarray2_Java;
+  static address _multianewarray3_Java;
+  static address _multianewarray4_Java;
+  static address _multianewarray5_Java;
+  static address _vtable_must_compile_Java;
+  static address _complete_monitor_locking_Java;
+  static address _rethrow_Java;
+
+  static address _slow_arraycopy_Java;
+  static address _register_finalizer_Java;
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  static address _zap_dead_Java_locals_Java;
+  static address _zap_dead_native_locals_Java;
+# endif
+
+
+  //
+  // Implementation of runtime methods
+  // =================================
+
+  // Allocate storage for a Java instance.
+  static void new_instance_C(klassOopDesc* instance_klass, JavaThread *thread);
+
+  // Allocate storage for a objArray or typeArray
+  static void new_array_C(klassOopDesc* array_klass, int len, JavaThread *thread);
+
+  // Post-allocation step for implementing ReduceInitialCardMarks:
+  static void do_eager_card_mark(JavaThread* thread);
+
+  // Allocate storage for a multi-dimensional arrays
+  // Note: needs to be fixed for arbitrary number of dimensions
+  static void multianewarray2_C(klassOopDesc* klass, int len1, int len2, JavaThread *thread);
+  static void multianewarray3_C(klassOopDesc* klass, int len1, int len2, int len3, JavaThread *thread);
+  static void multianewarray4_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, JavaThread *thread);
+  static void multianewarray5_C(klassOopDesc* klass, int len1, int len2, int len3, int len4, int len5, JavaThread *thread);
+
+public:
+  // Slow-path Locking and Unlocking
+  static void complete_monitor_locking_C(oopDesc* obj, BasicLock* lock, JavaThread* thread);
+  static void complete_monitor_unlocking_C(oopDesc* obj, BasicLock* lock);
+
+private:
+
+  // Implicit exception support
+  static void throw_null_exception_C(JavaThread* thread);
+
+  // Exception handling
+  static address handle_exception_C       (JavaThread* thread);
+  static address handle_exception_C_helper(JavaThread* thread, nmethod*& nm);
+  static address rethrow_C                (oopDesc* exception, JavaThread *thread, address return_pc );
+  static void deoptimize_caller_frame     (JavaThread *thread, bool doit);
+
+  // CodeBlob support
+  // ===================================================================
+
+  static ExceptionBlob*       _exception_blob;
+  static void generate_exception_blob();
+
+  static void register_finalizer(oopDesc* obj, JavaThread* thread);
+
+  // zaping dead locals, either from Java frames or from native frames
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  static void zap_dead_Java_locals_C(   JavaThread* thread);
+  static void zap_dead_native_locals_C( JavaThread* thread);
+
+  static void zap_dead_java_or_native_locals( JavaThread*, bool (*)(frame*));
+
+ public:
+   static int ZapDeadCompiledLocals_count;
+
+# endif
+
+
+ public:
+
+  static bool is_callee_saved_register(MachRegisterNumbers reg);
+
+  // One time only generate runtime code stubs
+  static void generate(ciEnv* env);
+
+  // Returns the name of a stub
+  static const char* stub_name(address entry);
+
+  // access to runtime stubs entry points for java code
+  static address new_instance_Java()                     { return _new_instance_Java; }
+  static address new_array_Java()                        { return _new_array_Java; }
+  static address multianewarray2_Java()                  { return _multianewarray2_Java; }
+  static address multianewarray3_Java()                  { return _multianewarray3_Java; }
+  static address multianewarray4_Java()                  { return _multianewarray4_Java; }
+  static address multianewarray5_Java()                  { return _multianewarray5_Java; }
+  static address vtable_must_compile_stub()              { return _vtable_must_compile_Java; }
+  static address complete_monitor_locking_Java()         { return _complete_monitor_locking_Java;   }
+
+  static address slow_arraycopy_Java()                   { return _slow_arraycopy_Java; }
+  static address register_finalizer_Java()               { return _register_finalizer_Java; }
+
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  static address zap_dead_locals_stub(bool is_native)    { return is_native
+                                                                  ? _zap_dead_native_locals_Java
+                                                                  : _zap_dead_Java_locals_Java; }
+  static MachNode* node_to_call_zap_dead_locals(Node* n, int block_num, bool is_native);
+# endif
+
+  static ExceptionBlob*    exception_blob()                      { return _exception_blob; }
+
+  // Leaf routines helping with method data update
+  static void profile_receiver_type_C(DataLayout* data, oopDesc* receiver);
+
+  // Implicit exception support
+  static void throw_div0_exception_C      (JavaThread* thread);
+  static void throw_stack_overflow_error_C(JavaThread* thread);
+
+  // Exception handling
+  static address rethrow_stub()             { return _rethrow_Java; }
+
+
+  // Type functions
+  // ======================================================
+
+  static const TypeFunc* new_instance_Type(); // object allocation (slow case)
+  static const TypeFunc* new_array_Type ();   // [a]newarray (slow case)
+  static const TypeFunc* multianewarray_Type(int ndim); // multianewarray
+  static const TypeFunc* multianewarray2_Type(); // multianewarray
+  static const TypeFunc* multianewarray3_Type(); // multianewarray
+  static const TypeFunc* multianewarray4_Type(); // multianewarray
+  static const TypeFunc* multianewarray5_Type(); // multianewarray
+  static const TypeFunc* complete_monitor_enter_Type();
+  static const TypeFunc* complete_monitor_exit_Type();
+  static const TypeFunc* uncommon_trap_Type();
+  static const TypeFunc* athrow_Type();
+  static const TypeFunc* rethrow_Type();
+  static const TypeFunc* Math_D_D_Type();  // sin,cos & friends
+  static const TypeFunc* Math_DD_D_Type(); // mod,pow & friends
+  static const TypeFunc* modf_Type();
+  static const TypeFunc* l2f_Type();
+  static const TypeFunc* current_time_millis_Type();
+
+  static const TypeFunc* flush_windows_Type();
+
+  // arraycopy routine types
+  static const TypeFunc* fast_arraycopy_Type(); // bit-blasters
+  static const TypeFunc* checkcast_arraycopy_Type();
+  static const TypeFunc* generic_arraycopy_Type();
+  static const TypeFunc* slow_arraycopy_Type();   // the full routine
+
+  // leaf on stack replacement interpreter accessor types
+  static const TypeFunc* osr_end_Type();
+
+  // leaf methodData routine types
+  static const TypeFunc* profile_receiver_type_Type();
+
+  // leaf on stack replacement interpreter accessor types
+  static const TypeFunc* fetch_int_Type();
+  static const TypeFunc* fetch_long_Type();
+  static const TypeFunc* fetch_float_Type();
+  static const TypeFunc* fetch_double_Type();
+  static const TypeFunc* fetch_oop_Type();
+  static const TypeFunc* fetch_monitor_Type();
+
+  static const TypeFunc* register_finalizer_Type();
+
+  // Dtrace support
+  static const TypeFunc* dtrace_method_entry_exit_Type();
+  static const TypeFunc* dtrace_object_alloc_Type();
+
+# ifdef ENABLE_ZAP_DEAD_LOCALS
+  static const TypeFunc* zap_dead_locals_Type();
+# endif
+
+ private:
+ static NamedCounter * volatile _named_counters;
+
+ public:
+ // helper function which creates a named counter labeled with the
+ // if they are available
+ static NamedCounter* new_named_counter(JVMState* jvms, NamedCounter::CounterTag tag);
+
+ // dumps all the named counters
+ static void          print_named_counters();
+
+};
diff --git a/src/share/vm/opto/split_if.cpp b/src/share/vm/opto/split_if.cpp
new file mode 100644
index 000000000..130b26675
--- /dev/null
+++ b/src/share/vm/opto/split_if.cpp
@@ -0,0 +1,536 @@
+/*
+ * Copyright 1999-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_split_if.cpp.incl"
+
+
+//------------------------------split_thru_region------------------------------
+// Split Node 'n' through merge point.
+Node *PhaseIdealLoop::split_thru_region( Node *n, Node *region ) {
+  uint wins = 0;
+  assert( n->is_CFG(), "" );
+  assert( region->is_Region(), "" );
+  Node *r = new (C, region->req()) RegionNode( region->req() );
+  IdealLoopTree *loop = get_loop( n );
+  for( uint i = 1; i < region->req(); i++ ) {
+    Node *x = n->clone();
+    Node *in0 = n->in(0);
+    if( in0->in(0) == region ) x->set_req( 0, in0->in(i) );
+    for( uint j = 1; j < n->req(); j++ ) {
+      Node *in = n->in(j);
+      if( get_ctrl(in) == region )
+        x->set_req( j, in->in(i) );
+    }
+    _igvn.register_new_node_with_optimizer(x);
+    set_loop(x, loop);
+    set_idom(x, x->in(0), dom_depth(x->in(0))+1);
+    r->init_req(i, x);
+  }
+
+  // Record region
+  r->set_req(0,region);         // Not a TRUE RegionNode
+  _igvn.register_new_node_with_optimizer(r);
+  set_loop(r, loop);
+  if( !loop->_child )
+    loop->_body.push(r);
+  return r;
+}
+
+//------------------------------split_up---------------------------------------
+// Split block-local op up through the phis to empty the current block
+bool PhaseIdealLoop::split_up( Node *n, Node *blk1, Node *blk2 ) {
+  if( n->is_CFG() ) {
+    assert( n->in(0) != blk1, "Lousy candidate for split-if" );
+    return false;
+  }
+  if( get_ctrl(n) != blk1 && get_ctrl(n) != blk2 )
+    return false;               // Not block local
+  if( n->is_Phi() ) return false; // Local PHIs are expected
+
+  // Recursively split-up inputs
+  for (uint i = 1; i < n->req(); i++) {
+    if( split_up( n->in(i), blk1, blk2 ) ) {
+      // Got split recursively and self went dead?
+      if (n->outcnt() == 0)
+        _igvn.remove_dead_node(n);
+      return true;
+    }
+  }
+
+  // Check for needing to clone-up a compare.  Can't do that, it forces
+  // another (nested) split-if transform.  Instead, clone it "down".
+  if( n->is_Cmp() ) {
+    assert(get_ctrl(n) == blk2 || get_ctrl(n) == blk1, "must be in block with IF");
+    // Check for simple Cmp/Bool/CMove which we can clone-up.  Cmp/Bool/CMove
+    // sequence can have no other users and it must all reside in the split-if
+    // block.  Non-simple Cmp/Bool/CMove sequences are 'cloned-down' below -
+    // private, per-use versions of the Cmp and Bool are made.  These sink to
+    // the CMove block.  If the CMove is in the split-if block, then in the
+    // next iteration this will become a simple Cmp/Bool/CMove set to clone-up.
+    Node *bol, *cmov;
+    if( !(n->outcnt() == 1 && n->unique_out()->is_Bool() &&
+          (bol = n->unique_out()->as_Bool()) &&
+          (get_ctrl(bol) == blk1 ||
+           get_ctrl(bol) == blk2) &&
+          bol->outcnt() == 1 &&
+          bol->unique_out()->is_CMove() &&
+          (cmov = bol->unique_out()->as_CMove()) &&
+          (get_ctrl(cmov) == blk1 ||
+           get_ctrl(cmov) == blk2) ) ) {
+
+      // Must clone down
+#ifndef PRODUCT
+      if( PrintOpto && VerifyLoopOptimizations ) {
+        tty->print("Cloning down: ");
+        n->dump();
+      }
+#endif
+      // Clone down any block-local BoolNode uses of this CmpNode
+      for (DUIterator i = n->outs(); n->has_out(i); i++) {
+        Node* bol = n->out(i);
+        assert( bol->is_Bool(), "" );
+        if (bol->outcnt() == 1) {
+          Node* use = bol->unique_out();
+          Node *use_c = use->is_If() ? use->in(0) : get_ctrl(use);
+          if (use_c == blk1 || use_c == blk2) {
+            continue;
+          }
+        }
+        if (get_ctrl(bol) == blk1 || get_ctrl(bol) == blk2) {
+          // Recursively sink any BoolNode
+#ifndef PRODUCT
+          if( PrintOpto && VerifyLoopOptimizations ) {
+            tty->print("Cloning down: ");
+            bol->dump();
+          }
+#endif
+          for (DUIterator_Last jmin, j = bol->last_outs(jmin); j >= jmin; --j) {
+            // Uses are either IfNodes or CMoves
+            Node* iff = bol->last_out(j);
+            assert( iff->in(1) == bol, "" );
+            // Get control block of either the CMove or the If input
+            Node *iff_ctrl = iff->is_If() ? iff->in(0) : get_ctrl(iff);
+            Node *x = bol->clone();
+            register_new_node(x, iff_ctrl);
+            _igvn.hash_delete(iff);
+            iff->set_req(1, x);
+            _igvn._worklist.push(iff);
+          }
+          _igvn.remove_dead_node( bol );
+          --i;
+        }
+      }
+      // Clone down this CmpNode
+      for (DUIterator_Last jmin, j = n->last_outs(jmin); j >= jmin; --j) {
+        Node* bol = n->last_out(j);
+        assert( bol->in(1) == n, "" );
+        Node *x = n->clone();
+        register_new_node(x, get_ctrl(bol));
+        _igvn.hash_delete(bol);
+        bol->set_req(1, x);
+        _igvn._worklist.push(bol);
+      }
+      _igvn.remove_dead_node( n );
+
+      return true;
+    }
+  }
+
+  // See if splitting-up a Store.  Any anti-dep loads must go up as
+  // well.  An anti-dep load might be in the wrong block, because in
+  // this particular layout/schedule we ignored anti-deps and allow
+  // memory to be alive twice.  This only works if we do the same
+  // operations on anti-dep loads as we do their killing stores.
+  if( n->is_Store() && n->in(MemNode::Memory)->in(0) == n->in(0) ) {
+    // Get store's memory slice
+    int alias_idx = C->get_alias_index(_igvn.type(n->in(MemNode::Address))->is_ptr());
+
+    // Get memory-phi anti-dep loads will be using
+    Node *memphi = n->in(MemNode::Memory);
+    assert( memphi->is_Phi(), "" );
+    // Hoist any anti-dep load to the splitting block;
+    // it will then "split-up".
+    for (DUIterator_Fast imax,i = memphi->fast_outs(imax); i < imax; i++) {
+      Node *load = memphi->fast_out(i);
+      if( load->is_Load() && alias_idx == C->get_alias_index(_igvn.type(load->in(MemNode::Address))->is_ptr()) )
+        set_ctrl(load,blk1);
+    }
+  }
+
+  // Found some other Node; must clone it up
+#ifndef PRODUCT
+  if( PrintOpto && VerifyLoopOptimizations ) {
+    tty->print("Cloning up: ");
+    n->dump();
+  }
+#endif
+
+  // Now actually split-up this guy.  One copy per control path merging.
+  Node *phi = PhiNode::make_blank(blk1, n);
+  for( uint j = 1; j < blk1->req(); j++ ) {
+    Node *x = n->clone();
+    if( n->in(0) && n->in(0) == blk1 )
+      x->set_req( 0, blk1->in(j) );
+    for( uint i = 1; i < n->req(); i++ ) {
+      Node *m = n->in(i);
+      if( get_ctrl(m) == blk1 ) {
+        assert( m->in(0) == blk1, "" );
+        x->set_req( i, m->in(j) );
+      }
+    }
+    register_new_node( x, blk1->in(j) );
+    phi->init_req( j, x );
+  }
+  // Announce phi to optimizer
+  register_new_node(phi, blk1);
+
+  // Remove cloned-up value from optimizer; use phi instead
+  _igvn.hash_delete(n);
+  _igvn.subsume_node( n, phi );
+
+  // (There used to be a self-recursive call to split_up() here,
+  // but it is not needed.  All necessary forward walking is done
+  // by do_split_if() below.)
+
+  return true;
+}
+
+//------------------------------register_new_node------------------------------
+void PhaseIdealLoop::register_new_node( Node *n, Node *blk ) {
+  _igvn.register_new_node_with_optimizer(n);
+  set_ctrl(n, blk);
+  IdealLoopTree *loop = get_loop(blk);
+  if( !loop->_child )
+    loop->_body.push(n);
+}
+
+//------------------------------small_cache------------------------------------
+struct small_cache : public Dict {
+
+  small_cache() : Dict( cmpkey, hashptr ) {}
+  Node *probe( Node *use_blk ) { return (Node*)((*this)[use_blk]); }
+  void lru_insert( Node *use_blk, Node *new_def ) { Insert(use_blk,new_def); }
+};
+
+//------------------------------spinup-----------------------------------------
+// "Spin up" the dominator tree, starting at the use site and stopping when we
+// find the post-dominating point.
+
+// We must be at the merge point which post-dominates 'new_false' and
+// 'new_true'.  Figure out which edges into the RegionNode eventually lead up
+// to false and which to true.  Put in a PhiNode to merge values; plug in
+// the appropriate false-arm or true-arm values.  If some path leads to the
+// original IF, then insert a Phi recursively.
+Node *PhaseIdealLoop::spinup( Node *iff_dom, Node *new_false, Node *new_true, Node *use_blk, Node *def, small_cache *cache ) {
+  if (use_blk->is_top())        // Handle dead uses
+    return use_blk;
+  Node *prior_n = (Node*)0xdeadbeef;
+  Node *n = use_blk;            // Get path input
+  assert( use_blk != iff_dom, "" );
+  // Here's the "spinup" the dominator tree loop.  Do a cache-check
+  // along the way, in case we've come this way before.
+  while( n != iff_dom ) {       // Found post-dominating point?
+    prior_n = n;
+    n = idom(n);                // Search higher
+    Node *s = cache->probe( prior_n ); // Check cache
+    if( s ) return s;           // Cache hit!
+  }
+
+  Node *phi_post;
+  if( prior_n == new_false || prior_n == new_true ) {
+    phi_post = def->clone();
+    phi_post->set_req(0, prior_n );
+    register_new_node(phi_post, prior_n);
+  } else {
+    // This method handles both control uses (looking for Regions) or data
+    // uses (looking for Phis).  If looking for a control use, then we need
+    // to insert a Region instead of a Phi; however Regions always exist
+    // previously (the hash_find_insert below would always hit) so we can
+    // return the existing Region.
+    if( def->is_CFG() ) {
+      phi_post = prior_n;       // If looking for CFG, return prior
+    } else {
+      assert( def->is_Phi(), "" );
+      assert( prior_n->is_Region(), "must be a post-dominating merge point" );
+
+      // Need a Phi here
+      phi_post = PhiNode::make_blank(prior_n, def);
+      // Search for both true and false on all paths till find one.
+      for( uint i = 1; i < phi_post->req(); i++ ) // For all paths
+        phi_post->init_req( i, spinup( iff_dom, new_false, new_true, prior_n->in(i), def, cache ) );
+      Node *t = _igvn.hash_find_insert(phi_post);
+      if( t ) {                 // See if we already have this one
+        // phi_post will not be used, so kill it
+        _igvn.remove_dead_node(phi_post);
+        phi_post->destruct();
+        phi_post = t;
+      } else {
+        register_new_node( phi_post, prior_n );
+      }
+    }
+  }
+
+  // Update cache everywhere
+  prior_n = (Node*)0xdeadbeef;  // Reset IDOM walk
+  n = use_blk;                  // Get path input
+  // Spin-up the idom tree again, basically doing path-compression.
+  // Insert cache entries along the way, so that if we ever hit this
+  // point in the IDOM tree again we'll stop immediately on a cache hit.
+  while( n != iff_dom ) {       // Found post-dominating point?
+    prior_n = n;
+    n = idom(n);                // Search higher
+    cache->lru_insert( prior_n, phi_post ); // Fill cache
+  } // End of while not gone high enough
+
+  return phi_post;
+}
+
+//------------------------------find_use_block---------------------------------
+// Find the block a USE is in.  Normally USE's are in the same block as the
+// using instruction.  For Phi-USE's, the USE is in the predecessor block
+// along the corresponding path.
+Node *PhaseIdealLoop::find_use_block( Node *use, Node *def, Node *old_false, Node *new_false, Node *old_true, Node *new_true ) {
+  // CFG uses are their own block
+  if( use->is_CFG() )
+    return use;
+
+  if( use->is_Phi() ) {         // Phi uses in prior block
+    // Grab the first Phi use; there may be many.
+    // Each will be handled as a seperate iteration of
+    // the "while( phi->outcnt() )" loop.
+    uint j;
+    for( j = 1; j < use->req(); j++ )
+      if( use->in(j) == def )
+        break;
+    assert( j < use->req(), "def should be among use's inputs" );
+    return use->in(0)->in(j);
+  }
+  // Normal (non-phi) use
+  Node *use_blk = get_ctrl(use);
+  // Some uses are directly attached to the old (and going away)
+  // false and true branches.
+  if( use_blk == old_false ) {
+    use_blk = new_false;
+    set_ctrl(use, new_false);
+  }
+  if( use_blk == old_true ) {
+    use_blk = new_true;
+    set_ctrl(use, new_true);
+  }
+
+  if (use_blk == NULL) {        // He's dead, Jim
+    _igvn.hash_delete(use);
+    _igvn.subsume_node(use, C->top());
+  }
+
+  return use_blk;
+}
+
+//------------------------------handle_use-------------------------------------
+// Handle uses of the merge point.  Basically, split-if makes the merge point
+// go away so all uses of the merge point must go away as well.  Most block
+// local uses have already been split-up, through the merge point.  Uses from
+// far below the merge point can't always be split up (e.g., phi-uses are
+// pinned) and it makes too much stuff live.  Instead we use a path-based
+// solution to move uses down.
+//
+// If the use is along the pre-split-CFG true branch, then the new use will
+// be from the post-split-CFG true merge point.  Vice-versa for the false
+// path.  Some uses will be along both paths; then we sink the use to the
+// post-dominating location; we may need to insert a Phi there.
+void PhaseIdealLoop::handle_use( Node *use, Node *def, small_cache *cache, Node *region_dom, Node *new_false, Node *new_true, Node *old_false, Node *old_true ) {
+
+  Node *use_blk = find_use_block(use,def,old_false,new_false,old_true,new_true);
+  if( !use_blk ) return;        // He's dead, Jim
+
+  // Walk up the dominator tree until I hit either the old IfFalse, the old
+  // IfTrue or the old If.  Insert Phis where needed.
+  Node *new_def = spinup( region_dom, new_false, new_true, use_blk, def, cache );
+
+  // Found where this USE goes.  Re-point him.
+  uint i;
+  for( i = 0; i < use->req(); i++ )
+    if( use->in(i) == def )
+      break;
+  assert( i < use->req(), "def should be among use's inputs" );
+  _igvn.hash_delete(use);
+  use->set_req(i, new_def);
+  _igvn._worklist.push(use);
+}
+
+//------------------------------do_split_if------------------------------------
+// Found an If getting its condition-code input from a Phi in the same block.
+// Split thru the Region.
+void PhaseIdealLoop::do_split_if( Node *iff ) {
+#ifndef PRODUCT
+  if( PrintOpto && VerifyLoopOptimizations )
+    tty->print_cr("Split-if");
+#endif
+  C->set_major_progress();
+  Node *region = iff->in(0);
+  Node *region_dom = idom(region);
+
+  // We are going to clone this test (and the control flow with it) up through
+  // the incoming merge point.  We need to empty the current basic block.
+  // Clone any instructions which must be in this block up through the merge
+  // point.
+  DUIterator i, j;
+  bool progress = true;
+  while (progress) {
+    progress = false;
+    for (i = region->outs(); region->has_out(i); i++) {
+      Node* n = region->out(i);
+      if( n == region ) continue;
+      // The IF to be split is OK.
+      if( n == iff ) continue;
+      if( !n->is_Phi() ) {      // Found pinned memory op or such
+        if (split_up(n, region, iff)) {
+          i = region->refresh_out_pos(i);
+          progress = true;
+        }
+        continue;
+      }
+      assert( n->in(0) == region, "" );
+
+      // Recursively split up all users of a Phi
+      for (j = n->outs(); n->has_out(j); j++) {
+        Node* m = n->out(j);
+        // If m is dead, throw it away, and declare progress
+        if (_nodes[m->_idx] == NULL) {
+          _igvn.remove_dead_node(m);
+          // fall through
+        }
+        else if (m != iff && split_up(m, region, iff)) {
+          // fall through
+        } else {
+          continue;
+        }
+        // Something unpredictable changed.
+        // Tell the iterators to refresh themselves, and rerun the loop.
+        i = region->refresh_out_pos(i);
+        j = region->refresh_out_pos(j);
+        progress = true;
+      }
+    }
+  }
+
+  // Now we have no instructions in the block containing the IF.
+  // Split the IF.
+  Node *new_iff = split_thru_region( iff, region );
+
+  // Replace both uses of 'new_iff' with Regions merging True/False
+  // paths.  This makes 'new_iff' go dead.
+  Node *old_false, *old_true;
+  Node *new_false, *new_true;
+  for (DUIterator_Last j2min, j2 = iff->last_outs(j2min); j2 >= j2min; --j2) {
+    Node *ifp = iff->last_out(j2);
+    assert( ifp->Opcode() == Op_IfFalse || ifp->Opcode() == Op_IfTrue, "" );
+    ifp->set_req(0, new_iff);
+    Node *ifpx = split_thru_region( ifp, region );
+
+    // Replace 'If' projection of a Region with a Region of
+    // 'If' projections.
+    ifpx->set_req(0, ifpx);       // A TRUE RegionNode
+
+    // Setup dominator info
+    set_idom(ifpx, region_dom, dom_depth(region_dom) + 1);
+
+    // Check for splitting loop tails
+    if( get_loop(iff)->tail() == ifp )
+      get_loop(iff)->_tail = ifpx;
+
+    // Replace in the graph with lazy-update mechanism
+    new_iff->set_req(0, new_iff); // hook self so it does not go dead
+    lazy_replace_proj( ifp, ifpx );
+    new_iff->set_req(0, region);
+
+    // Record bits for later xforms
+    if( ifp->Opcode() == Op_IfFalse ) {
+      old_false = ifp;
+      new_false = ifpx;
+    } else {
+      old_true = ifp;
+      new_true = ifpx;
+    }
+  }
+  _igvn.remove_dead_node(new_iff);
+  // Lazy replace IDOM info with the region's dominator
+  lazy_replace( iff, region_dom );
+
+  // Now make the original merge point go dead, by handling all its uses.
+  small_cache region_cache;
+  // Preload some control flow in region-cache
+  region_cache.lru_insert( new_false, new_false );
+  region_cache.lru_insert( new_true , new_true  );
+  // Now handle all uses of the splitting block
+  for (DUIterator_Last kmin, k = region->last_outs(kmin); k >= kmin; --k) {
+    Node* phi = region->last_out(k);
+    if( !phi->in(0) ) {         // Dead phi?  Remove it
+      _igvn.remove_dead_node(phi);
+      continue;
+    }
+    assert( phi->in(0) == region, "" );
+    if( phi == region ) {       // Found the self-reference
+      phi->set_req(0, NULL);
+      continue;                 // Break the self-cycle
+    }
+    // Expected common case: Phi hanging off of Region
+    if( phi->is_Phi() ) {
+      // Need a per-def cache.  Phi represents a def, so make a cache
+      small_cache phi_cache;
+
+      // Inspect all Phi uses to make the Phi go dead
+      for (DUIterator_Last lmin, l = phi->last_outs(lmin); l >= lmin; --l) {
+        Node* use = phi->last_out(l);
+        // Compute the new DEF for this USE.  New DEF depends on the path
+        // taken from the original DEF to the USE.  The new DEF may be some
+        // collection of PHI's merging values from different paths.  The Phis
+        // inserted depend only on the location of the USE.  We use a
+        // 2-element cache to handle multiple uses from the same block.
+        handle_use( use, phi, &phi_cache, region_dom, new_false, new_true, old_false, old_true );
+      } // End of while phi has uses
+
+      // Because handle_use might relocate region->_out,
+      // we must refresh the iterator.
+      k = region->last_outs(kmin);
+
+      // Remove the dead Phi
+      _igvn.remove_dead_node( phi );
+
+    } else {
+      // Random memory op guarded by Region.  Compute new DEF for USE.
+      handle_use( phi, region, &region_cache, region_dom, new_false, new_true, old_false, old_true );
+    }
+
+  } // End of while merge point has phis
+
+  // Any leftover bits in the splitting block must not have depended on local
+  // Phi inputs (these have already been split-up).  Hence it's safe to hoist
+  // these guys to the dominating point.
+  lazy_replace( region, region_dom );
+#ifndef PRODUCT
+  if( VerifyLoopOptimizations ) verify();
+#endif
+}
diff --git a/src/share/vm/opto/subnode.cpp b/src/share/vm/opto/subnode.cpp
new file mode 100644
index 000000000..1344197ca
--- /dev/null
+++ b/src/share/vm/opto/subnode.cpp
@@ -0,0 +1,1206 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_subnode.cpp.incl"
+#include "math.h"
+
+//=============================================================================
+//------------------------------Identity---------------------------------------
+// If right input is a constant 0, return the left input.
+Node *SubNode::Identity( PhaseTransform *phase ) {
+  assert(in(1) != this, "Must already have called Value");
+  assert(in(2) != this, "Must already have called Value");
+
+  // Remove double negation
+  const Type *zero = add_id();
+  if( phase->type( in(1) )->higher_equal( zero ) &&
+      in(2)->Opcode() == Opcode() &&
+      phase->type( in(2)->in(1) )->higher_equal( zero ) ) {
+    return in(2)->in(2);
+  }
+
+  // Convert "(X+Y) - Y" into X
+  if( in(1)->Opcode() == Op_AddI ) {
+    if( phase->eqv(in(1)->in(2),in(2)) )
+      return in(1)->in(1);
+    // Also catch: "(X + Opaque2(Y)) - Y".  In this case, 'Y' is a loop-varying
+    // trip counter and X is likely to be loop-invariant (that's how O2 Nodes
+    // are originally used, although the optimizer sometimes jiggers things).
+    // This folding through an O2 removes a loop-exit use of a loop-varying
+    // value and generally lowers register pressure in and around the loop.
+    if( in(1)->in(2)->Opcode() == Op_Opaque2 &&
+        phase->eqv(in(1)->in(2)->in(1),in(2)) )
+      return in(1)->in(1);
+  }
+
+  return ( phase->type( in(2) )->higher_equal( zero ) ) ? in(1) : this;
+}
+
+//------------------------------Value------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubNode::Value( PhaseTransform *phase ) const {
+  const Node* in1 = in(1);
+  const Node* in2 = in(2);
+  // Either input is TOP ==> the result is TOP
+  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+  if( t1 == Type::TOP ) return Type::TOP;
+  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Not correct for SubFnode and AddFNode (must check for infinity)
+  // Equal?  Subtract is zero
+  if (phase->eqv_uncast(in1, in2))  return add_id();
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  if( t1 == Type::BOTTOM || t2 == Type::BOTTOM )
+    return bottom_type();
+
+  return sub(t1,t2);            // Local flavor of type subtraction
+
+}
+
+//=============================================================================
+
+//------------------------------Helper function--------------------------------
+static bool ok_to_convert(Node* inc, Node* iv) {
+    // Do not collapse (x+c0)-y if "+" is a loop increment, because the
+    // "-" is loop invariant and collapsing extends the live-range of "x"
+    // to overlap with the "+", forcing another register to be used in
+    // the loop.
+    // This test will be clearer with '&&' (apply DeMorgan's rule)
+    // but I like the early cutouts that happen here.
+    const PhiNode *phi;
+    if( ( !inc->in(1)->is_Phi() ||
+          !(phi=inc->in(1)->as_Phi()) ||
+          phi->is_copy() ||
+          !phi->region()->is_CountedLoop() ||
+          inc != phi->region()->as_CountedLoop()->incr() )
+       &&
+        // Do not collapse (x+c0)-iv if "iv" is a loop induction variable,
+        // because "x" maybe invariant.
+        ( !iv->is_loop_iv() )
+      ) {
+      return true;
+    } else {
+      return false;
+    }
+}
+//------------------------------Ideal------------------------------------------
+Node *SubINode::Ideal(PhaseGVN *phase, bool can_reshape){
+  Node *in1 = in(1);
+  Node *in2 = in(2);
+  uint op1 = in1->Opcode();
+  uint op2 = in2->Opcode();
+
+#ifdef ASSERT
+  // Check for dead loop
+  if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
+      ( op1 == Op_AddI || op1 == Op_SubI ) &&
+      ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
+        phase->eqv( in1->in(1), in1  ) || phase->eqv( in1->in(2), in1 ) ) )
+    assert(false, "dead loop in SubINode::Ideal");
+#endif
+
+  const Type *t2 = phase->type( in2 );
+  if( t2 == Type::TOP ) return NULL;
+  // Convert "x-c0" into "x+ -c0".
+  if( t2->base() == Type::Int ){        // Might be bottom or top...
+    const TypeInt *i = t2->is_int();
+    if( i->is_con() )
+      return new (phase->C, 3) AddINode(in1, phase->intcon(-i->get_con()));
+  }
+
+  // Convert "(x+c0) - y" into (x-y) + c0"
+  // Do not collapse (x+c0)-y if "+" is a loop increment or
+  // if "y" is a loop induction variable.
+  if( op1 == Op_AddI && ok_to_convert(in1, in2) ) {
+    const Type *tadd = phase->type( in1->in(2) );
+    if( tadd->singleton() && tadd != Type::TOP ) {
+      Node *sub2 = phase->transform( new (phase->C, 3) SubINode( in1->in(1), in2 ));
+      return new (phase->C, 3) AddINode( sub2, in1->in(2) );
+    }
+  }
+
+
+  // Convert "x - (y+c0)" into "(x-y) - c0"
+  // Need the same check as in above optimization but reversed.
+  if (op2 == Op_AddI && ok_to_convert(in2, in1)) {
+    Node* in21 = in2->in(1);
+    Node* in22 = in2->in(2);
+    const TypeInt* tcon = phase->type(in22)->isa_int();
+    if (tcon != NULL && tcon->is_con()) {
+      Node* sub2 = phase->transform( new (phase->C, 3) SubINode(in1, in21) );
+      Node* neg_c0 = phase->intcon(- tcon->get_con());
+      return new (phase->C, 3) AddINode(sub2, neg_c0);
+    }
+  }
+
+  const Type *t1 = phase->type( in1 );
+  if( t1 == Type::TOP ) return NULL;
+
+#ifdef ASSERT
+  // Check for dead loop
+  if( ( op2 == Op_AddI || op2 == Op_SubI ) &&
+      ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
+        phase->eqv( in2->in(1), in2  ) || phase->eqv( in2->in(2), in2  ) ) )
+    assert(false, "dead loop in SubINode::Ideal");
+#endif
+
+  // Convert "x - (x+y)" into "-y"
+  if( op2 == Op_AddI &&
+      phase->eqv( in1, in2->in(1) ) )
+    return new (phase->C, 3) SubINode( phase->intcon(0),in2->in(2));
+  // Convert "(x-y) - x" into "-y"
+  if( op1 == Op_SubI &&
+      phase->eqv( in1->in(1), in2 ) )
+    return new (phase->C, 3) SubINode( phase->intcon(0),in1->in(2));
+  // Convert "x - (y+x)" into "-y"
+  if( op2 == Op_AddI &&
+      phase->eqv( in1, in2->in(2) ) )
+    return new (phase->C, 3) SubINode( phase->intcon(0),in2->in(1));
+
+  // Convert "0 - (x-y)" into "y-x"
+  if( t1 == TypeInt::ZERO && op2 == Op_SubI )
+    return new (phase->C, 3) SubINode( in2->in(2), in2->in(1) );
+
+  // Convert "0 - (x+con)" into "-con-x"
+  jint con;
+  if( t1 == TypeInt::ZERO && op2 == Op_AddI &&
+      (con = in2->in(2)->find_int_con(0)) != 0 )
+    return new (phase->C, 3) SubINode( phase->intcon(-con), in2->in(1) );
+
+  // Convert "(X+A) - (X+B)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(1) == in2->in(1) )
+    return new (phase->C, 3) SubINode( in1->in(2), in2->in(2) );
+
+  // Convert "(A+X) - (B+X)" into "A - B"
+  if( op1 == Op_AddI && op2 == Op_AddI && in1->in(2) == in2->in(2) )
+    return new (phase->C, 3) SubINode( in1->in(1), in2->in(1) );
+
+  // Convert "A-(B-C)" into (A+C)-B", since add is commutative and generally
+  // nicer to optimize than subtract.
+  if( op2 == Op_SubI && in2->outcnt() == 1) {
+    Node *add1 = phase->transform( new (phase->C, 3) AddINode( in1, in2->in(2) ) );
+    return new (phase->C, 3) SubINode( add1, in2->in(1) );
+  }
+
+  return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubINode::sub( const Type *t1, const Type *t2 ) const {
+  const TypeInt *r0 = t1->is_int(); // Handy access
+  const TypeInt *r1 = t2->is_int();
+  int32 lo = r0->_lo - r1->_hi;
+  int32 hi = r0->_hi - r1->_lo;
+
+  // We next check for 32-bit overflow.
+  // If that happens, we just assume all integers are possible.
+  if( (((r0->_lo ^ r1->_hi) >= 0) ||    // lo ends have same signs OR
+       ((r0->_lo ^      lo) >= 0)) &&   // lo results have same signs AND
+      (((r0->_hi ^ r1->_lo) >= 0) ||    // hi ends have same signs OR
+       ((r0->_hi ^      hi) >= 0)) )    // hi results have same signs
+    return TypeInt::make(lo,hi,MAX2(r0->_widen,r1->_widen));
+  else                          // Overflow; assume all integers
+    return TypeInt::INT;
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubLNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  Node *in1 = in(1);
+  Node *in2 = in(2);
+  uint op1 = in1->Opcode();
+  uint op2 = in2->Opcode();
+
+#ifdef ASSERT
+  // Check for dead loop
+  if( phase->eqv( in1, this ) || phase->eqv( in2, this ) ||
+      ( op1 == Op_AddL || op1 == Op_SubL ) &&
+      ( phase->eqv( in1->in(1), this ) || phase->eqv( in1->in(2), this ) ||
+        phase->eqv( in1->in(1), in1  ) || phase->eqv( in1->in(2), in1  ) ) )
+    assert(false, "dead loop in SubLNode::Ideal");
+#endif
+
+  if( phase->type( in2 ) == Type::TOP ) return NULL;
+  const TypeLong *i = phase->type( in2 )->isa_long();
+  // Convert "x-c0" into "x+ -c0".
+  if( i &&                      // Might be bottom or top...
+      i->is_con() )
+    return new (phase->C, 3) AddLNode(in1, phase->longcon(-i->get_con()));
+
+  // Convert "(x+c0) - y" into (x-y) + c0"
+  // Do not collapse (x+c0)-y if "+" is a loop increment or
+  // if "y" is a loop induction variable.
+  if( op1 == Op_AddL && ok_to_convert(in1, in2) ) {
+    Node *in11 = in1->in(1);
+    const Type *tadd = phase->type( in1->in(2) );
+    if( tadd->singleton() && tadd != Type::TOP ) {
+      Node *sub2 = phase->transform( new (phase->C, 3) SubLNode( in11, in2 ));
+      return new (phase->C, 3) AddLNode( sub2, in1->in(2) );
+    }
+  }
+
+  // Convert "x - (y+c0)" into "(x-y) - c0"
+  // Need the same check as in above optimization but reversed.
+  if (op2 == Op_AddL && ok_to_convert(in2, in1)) {
+    Node* in21 = in2->in(1);
+    Node* in22 = in2->in(2);
+    const TypeLong* tcon = phase->type(in22)->isa_long();
+    if (tcon != NULL && tcon->is_con()) {
+      Node* sub2 = phase->transform( new (phase->C, 3) SubLNode(in1, in21) );
+      Node* neg_c0 = phase->longcon(- tcon->get_con());
+      return new (phase->C, 3) AddLNode(sub2, neg_c0);
+    }
+  }
+
+  const Type *t1 = phase->type( in1 );
+  if( t1 == Type::TOP ) return NULL;
+
+#ifdef ASSERT
+  // Check for dead loop
+  if( ( op2 == Op_AddL || op2 == Op_SubL ) &&
+      ( phase->eqv( in2->in(1), this ) || phase->eqv( in2->in(2), this ) ||
+        phase->eqv( in2->in(1), in2  ) || phase->eqv( in2->in(2), in2  ) ) )
+    assert(false, "dead loop in SubLNode::Ideal");
+#endif
+
+  // Convert "x - (x+y)" into "-y"
+  if( op2 == Op_AddL &&
+      phase->eqv( in1, in2->in(1) ) )
+    return new (phase->C, 3) SubLNode( phase->makecon(TypeLong::ZERO), in2->in(2));
+  // Convert "x - (y+x)" into "-y"
+  if( op2 == Op_AddL &&
+      phase->eqv( in1, in2->in(2) ) )
+    return new (phase->C, 3) SubLNode( phase->makecon(TypeLong::ZERO),in2->in(1));
+
+  // Convert "0 - (x-y)" into "y-x"
+  if( phase->type( in1 ) == TypeLong::ZERO && op2 == Op_SubL )
+    return new (phase->C, 3) SubLNode( in2->in(2), in2->in(1) );
+
+  // Convert "(X+A) - (X+B)" into "A - B"
+  if( op1 == Op_AddL && op2 == Op_AddL && in1->in(1) == in2->in(1) )
+    return new (phase->C, 3) SubLNode( in1->in(2), in2->in(2) );
+
+  // Convert "(A+X) - (B+X)" into "A - B"
+  if( op1 == Op_AddL && op2 == Op_AddL && in1->in(2) == in2->in(2) )
+    return new (phase->C, 3) SubLNode( in1->in(1), in2->in(1) );
+
+  // Convert "A-(B-C)" into (A+C)-B"
+  if( op2 == Op_SubL && in2->outcnt() == 1) {
+    Node *add1 = phase->transform( new (phase->C, 3) AddLNode( in1, in2->in(2) ) );
+    return new (phase->C, 3) SubLNode( add1, in2->in(1) );
+  }
+
+  return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences it's two inputs.
+const Type *SubLNode::sub( const Type *t1, const Type *t2 ) const {
+  const TypeLong *r0 = t1->is_long(); // Handy access
+  const TypeLong *r1 = t2->is_long();
+  jlong lo = r0->_lo - r1->_hi;
+  jlong hi = r0->_hi - r1->_lo;
+
+  // We next check for 32-bit overflow.
+  // If that happens, we just assume all integers are possible.
+  if( (((r0->_lo ^ r1->_hi) >= 0) ||    // lo ends have same signs OR
+       ((r0->_lo ^      lo) >= 0)) &&   // lo results have same signs AND
+      (((r0->_hi ^ r1->_lo) >= 0) ||    // hi ends have same signs OR
+       ((r0->_hi ^      hi) >= 0)) )    // hi results have same signs
+    return TypeLong::make(lo,hi,MAX2(r0->_widen,r1->_widen));
+  else                          // Overflow; assume all integers
+    return TypeLong::LONG;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubFPNode::Value( PhaseTransform *phase ) const {
+  const Node* in1 = in(1);
+  const Node* in2 = in(2);
+  // Either input is TOP ==> the result is TOP
+  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+  if( t1 == Type::TOP ) return Type::TOP;
+  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // if both operands are infinity of same sign, the result is NaN; do
+  // not replace with zero
+  if( (t1->is_finite() && t2->is_finite()) ) {
+    if( phase->eqv(in1, in2) ) return add_id();
+  }
+
+  // Either input is BOTTOM ==> the result is the local BOTTOM
+  const Type *bot = bottom_type();
+  if( (t1 == bot) || (t2 == bot) ||
+      (t1 == Type::BOTTOM) || (t2 == Type::BOTTOM) )
+    return bot;
+
+  return sub(t1,t2);            // Local flavor of type subtraction
+}
+
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  const Type *t2 = phase->type( in(2) );
+  // Convert "x-c0" into "x+ -c0".
+  if( t2->base() == Type::FloatCon ) {  // Might be bottom or top...
+    // return new (phase->C, 3) AddFNode(in(1), phase->makecon( TypeF::make(-t2->getf()) ) );
+  }
+
+  // Not associative because of boundary conditions (infinity)
+  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+    // Convert "x - (x+y)" into "-y"
+    if( in(2)->is_Add() &&
+        phase->eqv(in(1),in(2)->in(1) ) )
+      return new (phase->C, 3) SubFNode( phase->makecon(TypeF::ZERO),in(2)->in(2));
+  }
+
+  // Cannot replace 0.0-X with -X because a 'fsub' bytecode computes
+  // 0.0-0.0 as +0.0, while a 'fneg' bytecode computes -0.0.
+  //if( phase->type(in(1)) == TypeF::ZERO )
+  //return new (phase->C, 2) NegFNode(in(2));
+
+  return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubFNode::sub( const Type *t1, const Type *t2 ) const {
+  // no folding if one of operands is infinity or NaN, do not do constant folding
+  if( g_isfinite(t1->getf()) && g_isfinite(t2->getf()) ) {
+    return TypeF::make( t1->getf() - t2->getf() );
+  }
+  else if( g_isnan(t1->getf()) ) {
+    return t1;
+  }
+  else if( g_isnan(t2->getf()) ) {
+    return t2;
+  }
+  else {
+    return Type::FLOAT;
+  }
+}
+
+//=============================================================================
+//------------------------------Ideal------------------------------------------
+Node *SubDNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  const Type *t2 = phase->type( in(2) );
+  // Convert "x-c0" into "x+ -c0".
+  if( t2->base() == Type::DoubleCon ) { // Might be bottom or top...
+    // return new (phase->C, 3) AddDNode(in(1), phase->makecon( TypeD::make(-t2->getd()) ) );
+  }
+
+  // Not associative because of boundary conditions (infinity)
+  if( IdealizedNumerics && !phase->C->method()->is_strict() ) {
+    // Convert "x - (x+y)" into "-y"
+    if( in(2)->is_Add() &&
+        phase->eqv(in(1),in(2)->in(1) ) )
+      return new (phase->C, 3) SubDNode( phase->makecon(TypeD::ZERO),in(2)->in(2));
+  }
+
+  // Cannot replace 0.0-X with -X because a 'dsub' bytecode computes
+  // 0.0-0.0 as +0.0, while a 'dneg' bytecode computes -0.0.
+  //if( phase->type(in(1)) == TypeD::ZERO )
+  //return new (phase->C, 2) NegDNode(in(2));
+
+  return NULL;
+}
+
+//------------------------------sub--------------------------------------------
+// A subtract node differences its two inputs.
+const Type *SubDNode::sub( const Type *t1, const Type *t2 ) const {
+  // no folding if one of operands is infinity or NaN, do not do constant folding
+  if( g_isfinite(t1->getd()) && g_isfinite(t2->getd()) ) {
+    return TypeD::make( t1->getd() - t2->getd() );
+  }
+  else if( g_isnan(t1->getd()) ) {
+    return t1;
+  }
+  else if( g_isnan(t2->getd()) ) {
+    return t2;
+  }
+  else {
+    return Type::DOUBLE;
+  }
+}
+
+//=============================================================================
+//------------------------------Idealize---------------------------------------
+// Unlike SubNodes, compare must still flatten return value to the
+// range -1, 0, 1.
+// And optimizations like those for (X + Y) - X fail if overflow happens.
+Node *CmpNode::Identity( PhaseTransform *phase ) {
+  return this;
+}
+
+//=============================================================================
+//------------------------------cmp--------------------------------------------
+// Simplify a CmpI (compare 2 integers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpINode::sub( const Type *t1, const Type *t2 ) const {
+  const TypeInt *r0 = t1->is_int(); // Handy access
+  const TypeInt *r1 = t2->is_int();
+
+  if( r0->_hi < r1->_lo )       // Range is always low?
+    return TypeInt::CC_LT;
+  else if( r0->_lo > r1->_hi )  // Range is always high?
+    return TypeInt::CC_GT;
+
+  else if( r0->is_con() && r1->is_con() ) { // comparing constants?
+    assert(r0->get_con() == r1->get_con(), "must be equal");
+    return TypeInt::CC_EQ;      // Equal results.
+  } else if( r0->_hi == r1->_lo ) // Range is never high?
+    return TypeInt::CC_LE;
+  else if( r0->_lo == r1->_hi ) // Range is never low?
+    return TypeInt::CC_GE;
+  return TypeInt::CC;           // else use worst case results
+}
+
+// Simplify a CmpU (compare 2 integers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpUNode::sub( const Type *t1, const Type *t2 ) const {
+  assert(!t1->isa_ptr(), "obsolete usage of CmpU");
+
+  // comparing two unsigned ints
+  const TypeInt *r0 = t1->is_int();   // Handy access
+  const TypeInt *r1 = t2->is_int();
+
+  // Current installed version
+  // Compare ranges for non-overlap
+  juint lo0 = r0->_lo;
+  juint hi0 = r0->_hi;
+  juint lo1 = r1->_lo;
+  juint hi1 = r1->_hi;
+
+  // If either one has both negative and positive values,
+  // it therefore contains both 0 and -1, and since [0..-1] is the
+  // full unsigned range, the type must act as an unsigned bottom.
+  bool bot0 = ((jint)(lo0 ^ hi0) < 0);
+  bool bot1 = ((jint)(lo1 ^ hi1) < 0);
+
+  if (bot0 || bot1) {
+    // All unsigned values are LE -1 and GE 0.
+    if (lo0 == 0 && hi0 == 0) {
+      return TypeInt::CC_LE;            //   0 <= bot
+    } else if (lo1 == 0 && hi1 == 0) {
+      return TypeInt::CC_GE;            // bot >= 0
+    }
+  } else {
+    // We can use ranges of the form [lo..hi] if signs are the same.
+    assert(lo0 <= hi0 && lo1 <= hi1, "unsigned ranges are valid");
+    // results are reversed, '-' > '+' for unsigned compare
+    if (hi0 < lo1) {
+      return TypeInt::CC_LT;            // smaller
+    } else if (lo0 > hi1) {
+      return TypeInt::CC_GT;            // greater
+    } else if (hi0 == lo1 && lo0 == hi1) {
+      return TypeInt::CC_EQ;            // Equal results
+    } else if (lo0 >= hi1) {
+      return TypeInt::CC_GE;
+    } else if (hi0 <= lo1) {
+      // Check for special case in Hashtable::get.  (See below.)
+      if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
+          in(1)->Opcode() == Op_ModI &&
+          in(1)->in(2) == in(2) )
+        return TypeInt::CC_LT;
+      return TypeInt::CC_LE;
+    }
+  }
+  // Check for special case in Hashtable::get - the hash index is
+  // mod'ed to the table size so the following range check is useless.
+  // Check for: (X Mod Y) CmpU Y, where the mod result and Y both have
+  // to be positive.
+  // (This is a gross hack, since the sub method never
+  // looks at the structure of the node in any other case.)
+  if ((jint)lo0 >= 0 && (jint)lo1 >= 0 &&
+      in(1)->Opcode() == Op_ModI &&
+      in(1)->in(2)->uncast() == in(2)->uncast())
+    return TypeInt::CC_LT;
+  return TypeInt::CC;                   // else use worst case results
+}
+
+//------------------------------Idealize---------------------------------------
+Node *CmpINode::Ideal( PhaseGVN *phase, bool can_reshape ) {
+  if (phase->type(in(2))->higher_equal(TypeInt::ZERO)) {
+    switch (in(1)->Opcode()) {
+    case Op_CmpL3:              // Collapse a CmpL3/CmpI into a CmpL
+      return new (phase->C, 3) CmpLNode(in(1)->in(1),in(1)->in(2));
+    case Op_CmpF3:              // Collapse a CmpF3/CmpI into a CmpF
+      return new (phase->C, 3) CmpFNode(in(1)->in(1),in(1)->in(2));
+    case Op_CmpD3:              // Collapse a CmpD3/CmpI into a CmpD
+      return new (phase->C, 3) CmpDNode(in(1)->in(1),in(1)->in(2));
+    //case Op_SubI:
+      // If (x - y) cannot overflow, then ((x - y) <?> 0)
+      // can be turned into (x <?> y).
+      // This is handled (with more general cases) by Ideal_sub_algebra.
+    }
+  }
+  return NULL;                  // No change
+}
+
+
+//=============================================================================
+// Simplify a CmpL (compare 2 longs ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpLNode::sub( const Type *t1, const Type *t2 ) const {
+  const TypeLong *r0 = t1->is_long(); // Handy access
+  const TypeLong *r1 = t2->is_long();
+
+  if( r0->_hi < r1->_lo )       // Range is always low?
+    return TypeInt::CC_LT;
+  else if( r0->_lo > r1->_hi )  // Range is always high?
+    return TypeInt::CC_GT;
+
+  else if( r0->is_con() && r1->is_con() ) { // comparing constants?
+    assert(r0->get_con() == r1->get_con(), "must be equal");
+    return TypeInt::CC_EQ;      // Equal results.
+  } else if( r0->_hi == r1->_lo ) // Range is never high?
+    return TypeInt::CC_LE;
+  else if( r0->_lo == r1->_hi ) // Range is never low?
+    return TypeInt::CC_GE;
+  return TypeInt::CC;           // else use worst case results
+}
+
+//=============================================================================
+//------------------------------sub--------------------------------------------
+// Simplify an CmpP (compare 2 pointers) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpPNode::sub( const Type *t1, const Type *t2 ) const {
+  const TypePtr *r0 = t1->is_ptr(); // Handy access
+  const TypePtr *r1 = t2->is_ptr();
+
+  // Undefined inputs makes for an undefined result
+  if( TypePtr::above_centerline(r0->_ptr) ||
+      TypePtr::above_centerline(r1->_ptr) )
+    return Type::TOP;
+
+  if (r0 == r1 && r0->singleton()) {
+    // Equal pointer constants (klasses, nulls, etc.)
+    return TypeInt::CC_EQ;
+  }
+
+  // See if it is 2 unrelated classes.
+  const TypeOopPtr* p0 = r0->isa_oopptr();
+  const TypeOopPtr* p1 = r1->isa_oopptr();
+  if (p0 && p1) {
+    ciKlass* klass0 = p0->klass();
+    bool    xklass0 = p0->klass_is_exact();
+    ciKlass* klass1 = p1->klass();
+    bool    xklass1 = p1->klass_is_exact();
+    int kps = (p0->isa_klassptr()?1:0) + (p1->isa_klassptr()?1:0);
+    if (klass0 && klass1 &&
+        kps != 1 &&             // both or neither are klass pointers
+        !klass0->is_interface() && // do not trust interfaces
+        !klass1->is_interface()) {
+      // See if neither subclasses the other, or if the class on top
+      // is precise.  In either of these cases, the compare must fail.
+      if (klass0->equals(klass1)   ||   // if types are unequal but klasses are
+          !klass0->is_java_klass() ||   // types not part of Java language?
+          !klass1->is_java_klass()) {   // types not part of Java language?
+        // Do nothing; we know nothing for imprecise types
+      } else if (klass0->is_subtype_of(klass1)) {
+        // If klass1's type is PRECISE, then we can fail.
+        if (xklass1)  return TypeInt::CC_GT;
+      } else if (klass1->is_subtype_of(klass0)) {
+        // If klass0's type is PRECISE, then we can fail.
+        if (xklass0)  return TypeInt::CC_GT;
+      } else {                  // Neither subtypes the other
+        return TypeInt::CC_GT;  // ...so always fail
+      }
+    }
+  }
+
+  // Known constants can be compared exactly
+  // Null can be distinguished from any NotNull pointers
+  // Unknown inputs makes an unknown result
+  if( r0->singleton() ) {
+    intptr_t bits0 = r0->get_con();
+    if( r1->singleton() )
+      return bits0 == r1->get_con() ? TypeInt::CC_EQ : TypeInt::CC_GT;
+    return ( r1->_ptr == TypePtr::NotNull && bits0==0 ) ? TypeInt::CC_GT : TypeInt::CC;
+  } else if( r1->singleton() ) {
+    intptr_t bits1 = r1->get_con();
+    return ( r0->_ptr == TypePtr::NotNull && bits1==0 ) ? TypeInt::CC_GT : TypeInt::CC;
+  } else
+    return TypeInt::CC;
+}
+
+//------------------------------Ideal------------------------------------------
+// Check for the case of comparing an unknown klass loaded from the primary
+// super-type array vs a known klass with no subtypes.  This amounts to
+// checking to see an unknown klass subtypes a known klass with no subtypes;
+// this only happens on an exact match.  We can shorten this test by 1 load.
+Node *CmpPNode::Ideal( PhaseGVN *phase, bool can_reshape ) {
+  // Constant pointer on right?
+  const TypeKlassPtr* t2 = phase->type(in(2))->isa_klassptr();
+  if (t2 == NULL || !t2->klass_is_exact())
+    return NULL;
+  // Get the constant klass we are comparing to.
+  ciKlass* superklass = t2->klass();
+
+  // Now check for LoadKlass on left.
+  Node* ldk1 = in(1);
+  if (ldk1->Opcode() != Op_LoadKlass)
+    return NULL;
+  // Take apart the address of the LoadKlass:
+  Node* adr1 = ldk1->in(MemNode::Address);
+  intptr_t con2 = 0;
+  Node* ldk2 = AddPNode::Ideal_base_and_offset(adr1, phase, con2);
+  if (ldk2 == NULL)
+    return NULL;
+  if (con2 == oopDesc::klass_offset_in_bytes()) {
+    // We are inspecting an object's concrete class.
+    // Short-circuit the check if the query is abstract.
+    if (superklass->is_interface() ||
+        superklass->is_abstract()) {
+      // Make it come out always false:
+      this->set_req(2, phase->makecon(TypePtr::NULL_PTR));
+      return this;
+    }
+  }
+
+  // Check for a LoadKlass from primary supertype array.
+  // Any nested loadklass from loadklass+con must be from the p.s. array.
+  if (ldk2->Opcode() != Op_LoadKlass)
+    return NULL;
+
+  // Verify that we understand the situation
+  if (con2 != (intptr_t) superklass->super_check_offset())
+    return NULL;                // Might be element-klass loading from array klass
+
+  // If 'superklass' has no subklasses and is not an interface, then we are
+  // assured that the only input which will pass the type check is
+  // 'superklass' itself.
+  //
+  // We could be more liberal here, and allow the optimization on interfaces
+  // which have a single implementor.  This would require us to increase the
+  // expressiveness of the add_dependency() mechanism.
+  // %%% Do this after we fix TypeOopPtr:  Deps are expressive enough now.
+
+  // Object arrays must have their base element have no subtypes
+  while (superklass->is_obj_array_klass()) {
+    ciType* elem = superklass->as_obj_array_klass()->element_type();
+    superklass = elem->as_klass();
+  }
+  if (superklass->is_instance_klass()) {
+    ciInstanceKlass* ik = superklass->as_instance_klass();
+    if (ik->has_subklass() || ik->is_interface())  return NULL;
+    // Add a dependency if there is a chance that a subclass will be added later.
+    if (!ik->is_final()) {
+      phase->C->dependencies()->assert_leaf_type(ik);
+    }
+  }
+
+  // Bypass the dependent load, and compare directly
+  this->set_req(1,ldk2);
+
+  return this;
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Simplify an CmpF (compare 2 floats ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpFNode::Value( PhaseTransform *phase ) const {
+  const Node* in1 = in(1);
+  const Node* in2 = in(2);
+  // Either input is TOP ==> the result is TOP
+  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+  if( t1 == Type::TOP ) return Type::TOP;
+  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Not constants?  Don't know squat - even if they are the same
+  // value!  If they are NaN's they compare to LT instead of EQ.
+  const TypeF *tf1 = t1->isa_float_constant();
+  const TypeF *tf2 = t2->isa_float_constant();
+  if( !tf1 || !tf2 ) return TypeInt::CC;
+
+  // This implements the Java bytecode fcmpl, so unordered returns -1.
+  if( tf1->is_nan() || tf2->is_nan() )
+    return TypeInt::CC_LT;
+
+  if( tf1->_f < tf2->_f ) return TypeInt::CC_LT;
+  if( tf1->_f > tf2->_f ) return TypeInt::CC_GT;
+  assert( tf1->_f == tf2->_f, "do not understand FP behavior" );
+  return TypeInt::CC_EQ;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Simplify an CmpD (compare 2 doubles ) node, based on local information.
+// If both inputs are constants, compare them.
+const Type *CmpDNode::Value( PhaseTransform *phase ) const {
+  const Node* in1 = in(1);
+  const Node* in2 = in(2);
+  // Either input is TOP ==> the result is TOP
+  const Type* t1 = (in1 == this) ? Type::TOP : phase->type(in1);
+  if( t1 == Type::TOP ) return Type::TOP;
+  const Type* t2 = (in2 == this) ? Type::TOP : phase->type(in2);
+  if( t2 == Type::TOP ) return Type::TOP;
+
+  // Not constants?  Don't know squat - even if they are the same
+  // value!  If they are NaN's they compare to LT instead of EQ.
+  const TypeD *td1 = t1->isa_double_constant();
+  const TypeD *td2 = t2->isa_double_constant();
+  if( !td1 || !td2 ) return TypeInt::CC;
+
+  // This implements the Java bytecode dcmpl, so unordered returns -1.
+  if( td1->is_nan() || td2->is_nan() )
+    return TypeInt::CC_LT;
+
+  if( td1->_d < td2->_d ) return TypeInt::CC_LT;
+  if( td1->_d > td2->_d ) return TypeInt::CC_GT;
+  assert( td1->_d == td2->_d, "do not understand FP behavior" );
+  return TypeInt::CC_EQ;
+}
+
+//------------------------------Ideal------------------------------------------
+Node *CmpDNode::Ideal(PhaseGVN *phase, bool can_reshape){
+  // Check if we can change this to a CmpF and remove a ConvD2F operation.
+  // Change  (CMPD (F2D (float)) (ConD value))
+  // To      (CMPF      (float)  (ConF value))
+  // Valid when 'value' does not lose precision as a float.
+  // Benefits: eliminates conversion, does not require 24-bit mode
+
+  // NaNs prevent commuting operands.  This transform works regardless of the
+  // order of ConD and ConvF2D inputs by preserving the original order.
+  int idx_f2d = 1;              // ConvF2D on left side?
+  if( in(idx_f2d)->Opcode() != Op_ConvF2D )
+    idx_f2d = 2;                // No, swap to check for reversed args
+  int idx_con = 3-idx_f2d;      // Check for the constant on other input
+
+  if( ConvertCmpD2CmpF &&
+      in(idx_f2d)->Opcode() == Op_ConvF2D &&
+      in(idx_con)->Opcode() == Op_ConD ) {
+    const TypeD *t2 = in(idx_con)->bottom_type()->is_double_constant();
+    double t2_value_as_double = t2->_d;
+    float  t2_value_as_float  = (float)t2_value_as_double;
+    if( t2_value_as_double == (double)t2_value_as_float ) {
+      // Test value can be represented as a float
+      // Eliminate the conversion to double and create new comparison
+      Node *new_in1 = in(idx_f2d)->in(1);
+      Node *new_in2 = phase->makecon( TypeF::make(t2_value_as_float) );
+      if( idx_f2d != 1 ) {      // Must flip args to match original order
+        Node *tmp = new_in1;
+        new_in1 = new_in2;
+        new_in2 = tmp;
+      }
+      CmpFNode *new_cmp = (Opcode() == Op_CmpD3)
+        ? new (phase->C, 3) CmpF3Node( new_in1, new_in2 )
+        : new (phase->C, 3) CmpFNode ( new_in1, new_in2 ) ;
+      return new_cmp;           // Changed to CmpFNode
+    }
+    // Testing value required the precision of a double
+  }
+  return NULL;                  // No change
+}
+
+
+//=============================================================================
+//------------------------------cc2logical-------------------------------------
+// Convert a condition code type to a logical type
+const Type *BoolTest::cc2logical( const Type *CC ) const {
+  if( CC == Type::TOP ) return Type::TOP;
+  if( CC->base() != Type::Int ) return TypeInt::BOOL; // Bottom or worse
+  const TypeInt *ti = CC->is_int();
+  if( ti->is_con() ) {          // Only 1 kind of condition codes set?
+    // Match low order 2 bits
+    int tmp = ((ti->get_con()&3) == (_test&3)) ? 1 : 0;
+    if( _test & 4 ) tmp = 1-tmp;     // Optionally complement result
+    return TypeInt::make(tmp);       // Boolean result
+  }
+
+  if( CC == TypeInt::CC_GE ) {
+    if( _test == ge ) return TypeInt::ONE;
+    if( _test == lt ) return TypeInt::ZERO;
+  }
+  if( CC == TypeInt::CC_LE ) {
+    if( _test == le ) return TypeInt::ONE;
+    if( _test == gt ) return TypeInt::ZERO;
+  }
+
+  return TypeInt::BOOL;
+}
+
+//------------------------------dump_spec-------------------------------------
+// Print special per-node info
+#ifndef PRODUCT
+void BoolTest::dump_on(outputStream *st) const {
+  const char *msg[] = {"eq","gt","??","lt","ne","le","??","ge"};
+  st->print(msg[_test]);
+}
+#endif
+
+//=============================================================================
+uint BoolNode::hash() const { return (Node::hash() << 3)|(_test._test+1); }
+uint BoolNode::size_of() const { return sizeof(BoolNode); }
+
+//------------------------------operator==-------------------------------------
+uint BoolNode::cmp( const Node &n ) const {
+  const BoolNode *b = (const BoolNode *)&n; // Cast up
+  return (_test._test == b->_test._test);
+}
+
+//------------------------------clone_cmp--------------------------------------
+// Clone a compare/bool tree
+static Node *clone_cmp( Node *cmp, Node *cmp1, Node *cmp2, PhaseGVN *gvn, BoolTest::mask test ) {
+  Node *ncmp = cmp->clone();
+  ncmp->set_req(1,cmp1);
+  ncmp->set_req(2,cmp2);
+  ncmp = gvn->transform( ncmp );
+  return new (gvn->C, 2) BoolNode( ncmp, test );
+}
+
+//-------------------------------make_predicate--------------------------------
+Node* BoolNode::make_predicate(Node* test_value, PhaseGVN* phase) {
+  if (test_value->is_Con())   return test_value;
+  if (test_value->is_Bool())  return test_value;
+  Compile* C = phase->C;
+  if (test_value->is_CMove() &&
+      test_value->in(CMoveNode::Condition)->is_Bool()) {
+    BoolNode*   bol   = test_value->in(CMoveNode::Condition)->as_Bool();
+    const Type* ftype = phase->type(test_value->in(CMoveNode::IfFalse));
+    const Type* ttype = phase->type(test_value->in(CMoveNode::IfTrue));
+    if (ftype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ttype)) {
+      return bol;
+    } else if (ttype == TypeInt::ZERO && !TypeInt::ZERO->higher_equal(ftype)) {
+      return phase->transform( bol->negate(phase) );
+    }
+    // Else fall through.  The CMove gets in the way of the test.
+    // It should be the case that make_predicate(bol->as_int_value()) == bol.
+  }
+  Node* cmp = new (C, 3) CmpINode(test_value, phase->intcon(0));
+  cmp = phase->transform(cmp);
+  Node* bol = new (C, 2) BoolNode(cmp, BoolTest::ne);
+  return phase->transform(bol);
+}
+
+//--------------------------------as_int_value---------------------------------
+Node* BoolNode::as_int_value(PhaseGVN* phase) {
+  // Inverse to make_predicate.  The CMove probably boils down to a Conv2B.
+  Node* cmov = CMoveNode::make(phase->C, NULL, this,
+                               phase->intcon(0), phase->intcon(1),
+                               TypeInt::BOOL);
+  return phase->transform(cmov);
+}
+
+//----------------------------------negate-------------------------------------
+BoolNode* BoolNode::negate(PhaseGVN* phase) {
+  Compile* C = phase->C;
+  return new (C, 2) BoolNode(in(1), _test.negate());
+}
+
+
+//------------------------------Ideal------------------------------------------
+Node *BoolNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  // Change "bool tst (cmp con x)" into "bool ~tst (cmp x con)".
+  // This moves the constant to the right.  Helps value-numbering.
+  Node *cmp = in(1);
+  if( !cmp->is_Sub() ) return NULL;
+  int cop = cmp->Opcode();
+  if( cop == Op_FastLock || cop == Op_FastUnlock ) return NULL;
+  Node *cmp1 = cmp->in(1);
+  Node *cmp2 = cmp->in(2);
+  if( !cmp1 ) return NULL;
+
+  // Constant on left?
+  Node *con = cmp1;
+  uint op2 = cmp2->Opcode();
+  // Move constants to the right of compare's to canonicalize.
+  // Do not muck with Opaque1 nodes, as this indicates a loop
+  // guard that cannot change shape.
+  if( con->is_Con() && !cmp2->is_Con() && op2 != Op_Opaque1 &&
+      // Because of NaN's, CmpD and CmpF are not commutative
+      cop != Op_CmpD && cop != Op_CmpF &&
+      // Protect against swapping inputs to a compare when it is used by a
+      // counted loop exit, which requires maintaining the loop-limit as in(2)
+      !is_counted_loop_exit_test() ) {
+    // Ok, commute the constant to the right of the cmp node.
+    // Clone the Node, getting a new Node of the same class
+    cmp = cmp->clone();
+    // Swap inputs to the clone
+    cmp->swap_edges(1, 2);
+    cmp = phase->transform( cmp );
+    return new (phase->C, 2) BoolNode( cmp, _test.commute() );
+  }
+
+  // Change "bool eq/ne (cmp (xor X 1) 0)" into "bool ne/eq (cmp X 0)".
+  // The XOR-1 is an idiom used to flip the sense of a bool.  We flip the
+  // test instead.
+  int cmp1_op = cmp1->Opcode();
+  const TypeInt* cmp2_type = phase->type(cmp2)->isa_int();
+  if (cmp2_type == NULL)  return NULL;
+  Node* j_xor = cmp1;
+  if( cmp2_type == TypeInt::ZERO &&
+      cmp1_op == Op_XorI &&
+      j_xor->in(1) != j_xor &&          // An xor of itself is dead
+      phase->type( j_xor->in(2) ) == TypeInt::ONE &&
+      (_test._test == BoolTest::eq ||
+       _test._test == BoolTest::ne) ) {
+    Node *ncmp = phase->transform(new (phase->C, 3) CmpINode(j_xor->in(1),cmp2));
+    return new (phase->C, 2) BoolNode( ncmp, _test.negate() );
+  }
+
+  // Change "bool eq/ne (cmp (Conv2B X) 0)" into "bool eq/ne (cmp X 0)".
+  // This is a standard idiom for branching on a boolean value.
+  Node *c2b = cmp1;
+  if( cmp2_type == TypeInt::ZERO &&
+      cmp1_op == Op_Conv2B &&
+      (_test._test == BoolTest::eq ||
+       _test._test == BoolTest::ne) ) {
+    Node *ncmp = phase->transform(phase->type(c2b->in(1))->isa_int()
+       ? (Node*)new (phase->C, 3) CmpINode(c2b->in(1),cmp2)
+       : (Node*)new (phase->C, 3) CmpPNode(c2b->in(1),phase->makecon(TypePtr::NULL_PTR))
+    );
+    return new (phase->C, 2) BoolNode( ncmp, _test._test );
+  }
+
+  // Comparing a SubI against a zero is equal to comparing the SubI
+  // arguments directly.  This only works for eq and ne comparisons
+  // due to possible integer overflow.
+  if ((_test._test == BoolTest::eq || _test._test == BoolTest::ne) &&
+        (cop == Op_CmpI) &&
+        (cmp1->Opcode() == Op_SubI) &&
+        ( cmp2_type == TypeInt::ZERO ) ) {
+    Node *ncmp = phase->transform( new (phase->C, 3) CmpINode(cmp1->in(1),cmp1->in(2)));
+    return new (phase->C, 2) BoolNode( ncmp, _test._test );
+  }
+
+  // Change (-A vs 0) into (A vs 0) by commuting the test.  Disallow in the
+  // most general case because negating 0x80000000 does nothing.  Needed for
+  // the CmpF3/SubI/CmpI idiom.
+  if( cop == Op_CmpI &&
+      cmp1->Opcode() == Op_SubI &&
+      cmp2_type == TypeInt::ZERO &&
+      phase->type( cmp1->in(1) ) == TypeInt::ZERO &&
+      phase->type( cmp1->in(2) )->higher_equal(TypeInt::SYMINT) ) {
+    Node *ncmp = phase->transform( new (phase->C, 3) CmpINode(cmp1->in(2),cmp2));
+    return new (phase->C, 2) BoolNode( ncmp, _test.commute() );
+  }
+
+  //  The transformation below is not valid for either signed or unsigned
+  //  comparisons due to wraparound concerns at MAX_VALUE and MIN_VALUE.
+  //  This transformation can be resurrected when we are able to
+  //  make inferences about the range of values being subtracted from
+  //  (or added to) relative to the wraparound point.
+  //
+  //    // Remove +/-1's if possible.
+  //    // "X <= Y-1" becomes "X <  Y"
+  //    // "X+1 <= Y" becomes "X <  Y"
+  //    // "X <  Y+1" becomes "X <= Y"
+  //    // "X-1 <  Y" becomes "X <= Y"
+  //    // Do not this to compares off of the counted-loop-end.  These guys are
+  //    // checking the trip counter and they want to use the post-incremented
+  //    // counter.  If they use the PRE-incremented counter, then the counter has
+  //    // to be incremented in a private block on a loop backedge.
+  //    if( du && du->cnt(this) && du->out(this)[0]->Opcode() == Op_CountedLoopEnd )
+  //      return NULL;
+  //  #ifndef PRODUCT
+  //    // Do not do this in a wash GVN pass during verification.
+  //    // Gets triggered by too many simple optimizations to be bothered with
+  //    // re-trying it again and again.
+  //    if( !phase->allow_progress() ) return NULL;
+  //  #endif
+  //    // Not valid for unsigned compare because of corner cases in involving zero.
+  //    // For example, replacing "X-1 <u Y" with "X <=u Y" fails to throw an
+  //    // exception in case X is 0 (because 0-1 turns into 4billion unsigned but
+  //    // "0 <=u Y" is always true).
+  //    if( cmp->Opcode() == Op_CmpU ) return NULL;
+  //    int cmp2_op = cmp2->Opcode();
+  //    if( _test._test == BoolTest::le ) {
+  //      if( cmp1_op == Op_AddI &&
+  //          phase->type( cmp1->in(2) ) == TypeInt::ONE )
+  //        return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::lt );
+  //      else if( cmp2_op == Op_AddI &&
+  //         phase->type( cmp2->in(2) ) == TypeInt::MINUS_1 )
+  //        return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::lt );
+  //    } else if( _test._test == BoolTest::lt ) {
+  //      if( cmp1_op == Op_AddI &&
+  //          phase->type( cmp1->in(2) ) == TypeInt::MINUS_1 )
+  //        return clone_cmp( cmp, cmp1->in(1), cmp2, phase, BoolTest::le );
+  //      else if( cmp2_op == Op_AddI &&
+  //         phase->type( cmp2->in(2) ) == TypeInt::ONE )
+  //        return clone_cmp( cmp, cmp1, cmp2->in(1), phase, BoolTest::le );
+  //    }
+
+  return NULL;
+}
+
+//------------------------------Value------------------------------------------
+// Simplify a Bool (convert condition codes to boolean (1 or 0)) node,
+// based on local information.   If the input is constant, do it.
+const Type *BoolNode::Value( PhaseTransform *phase ) const {
+  return _test.cc2logical( phase->type( in(1) ) );
+}
+
+//------------------------------dump_spec--------------------------------------
+// Dump special per-node info
+#ifndef PRODUCT
+void BoolNode::dump_spec(outputStream *st) const {
+  st->print("[");
+  _test.dump_on(st);
+  st->print("]");
+}
+#endif
+
+//------------------------------is_counted_loop_exit_test--------------------------------------
+// Returns true if node is used by a counted loop node.
+bool BoolNode::is_counted_loop_exit_test() {
+  for( DUIterator_Fast imax, i = fast_outs(imax); i < imax; i++ ) {
+    Node* use = fast_out(i);
+    if (use->is_CountedLoopEnd()) {
+      return true;
+    }
+  }
+  return false;
+}
+
+//=============================================================================
+//------------------------------NegNode----------------------------------------
+Node *NegFNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_SubF )
+    return new (phase->C, 3) SubFNode( in(1)->in(2), in(1)->in(1) );
+  return NULL;
+}
+
+Node *NegDNode::Ideal(PhaseGVN *phase, bool can_reshape) {
+  if( in(1)->Opcode() == Op_SubD )
+    return new (phase->C, 3) SubDNode( in(1)->in(2), in(1)->in(1) );
+  return NULL;
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute sqrt
+const Type *SqrtDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( sqrt( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute cos
+const Type *CosDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dcos( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute sin
+const Type *SinDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dsin( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute tan
+const Type *TanDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dtan( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute log
+const Type *LogDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dlog( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute log10
+const Type *Log10DNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dlog10( d ) );
+}
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute exp
+const Type *ExpDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d = t1->getd();
+  if( d < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dexp( d ) );
+}
+
+
+//=============================================================================
+//------------------------------Value------------------------------------------
+// Compute pow
+const Type *PowDNode::Value( PhaseTransform *phase ) const {
+  const Type *t1 = phase->type( in(1) );
+  if( t1 == Type::TOP ) return Type::TOP;
+  if( t1->base() != Type::DoubleCon ) return Type::DOUBLE;
+  const Type *t2 = phase->type( in(2) );
+  if( t2 == Type::TOP ) return Type::TOP;
+  if( t2->base() != Type::DoubleCon ) return Type::DOUBLE;
+  double d1 = t1->getd();
+  double d2 = t2->getd();
+  if( d1 < 0.0 ) return Type::DOUBLE;
+  if( d2 < 0.0 ) return Type::DOUBLE;
+  return TypeD::make( SharedRuntime::dpow( d1, d2 ) );
+}
diff --git a/src/share/vm/opto/subnode.hpp b/src/share/vm/opto/subnode.hpp
new file mode 100644
index 000000000..4992a59c5
--- /dev/null
+++ b/src/share/vm/opto/subnode.hpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright 1997-2006 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+//------------------------------SUBNode----------------------------------------
+// Class SUBTRACTION functionality.  This covers all the usual 'subtract'
+// behaviors.  Subtract-integer, -float, -double, binary xor, compare-integer,
+// -float, and -double are all inherited from this class.  The compare
+// functions behave like subtract functions, except that all negative answers
+// are compressed into -1, and all positive answers compressed to 1.
+class SubNode : public Node {
+public:
+  SubNode( Node *in1, Node *in2 ) : Node(0,in1,in2) {
+    init_class_id(Class_Sub);
+  }
+
+  // Handle algebraic identities here.  If we have an identity, return the Node
+  // we are equivalent to.  We look for "add of zero" as an identity.
+  virtual Node *Identity( PhaseTransform *phase );
+
+  // Compute a new Type for this node.  Basically we just do the pre-check,
+  // then call the virtual add() to set the type.
+  virtual const Type *Value( PhaseTransform *phase ) const;
+
+  // Supplied function returns the subtractend of the inputs.
+  // This also type-checks the inputs for sanity.  Guaranteed never to
+  // be passed a TOP or BOTTOM type, these are filtered out by a pre-check.
+  virtual const Type *sub( const Type *, const Type * ) const = 0;
+
+  // Supplied function to return the additive identity type.
+  // This is returned whenever the subtracts inputs are the same.
+  virtual const Type *add_id() const = 0;
+
+};
+
+
+// NOTE: SubINode should be taken away and replaced by add and negate
+//------------------------------SubINode---------------------------------------
+// Subtract 2 integers
+class SubINode : public SubNode {
+public:
+  SubINode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+  const Type *add_id() const { return TypeInt::ZERO; }
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------SubLNode---------------------------------------
+// Subtract 2 integers
+class SubLNode : public SubNode {
+public:
+  SubLNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+  const Type *add_id() const { return TypeLong::ZERO; }
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+// NOTE: SubFPNode should be taken away and replaced by add and negate
+//------------------------------SubFPNode--------------------------------------
+// Subtract 2 floats or doubles
+class SubFPNode : public SubNode {
+protected:
+  SubFPNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {}
+public:
+  const Type *Value( PhaseTransform *phase ) const;
+};
+
+// NOTE: SubFNode should be taken away and replaced by add and negate
+//------------------------------SubFNode---------------------------------------
+// Subtract 2 doubles
+class SubFNode : public SubFPNode {
+public:
+  SubFNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+  const Type   *add_id() const { return TypeF::ZERO; }
+  const Type   *bottom_type() const { return Type::FLOAT; }
+  virtual uint  ideal_reg() const { return Op_RegF; }
+};
+
+// NOTE: SubDNode should be taken away and replaced by add and negate
+//------------------------------SubDNode---------------------------------------
+// Subtract 2 doubles
+class SubDNode : public SubFPNode {
+public:
+  SubDNode( Node *in1, Node *in2 ) : SubFPNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+  const Type   *add_id() const { return TypeD::ZERO; }
+  const Type   *bottom_type() const { return Type::DOUBLE; }
+  virtual uint  ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------CmpNode---------------------------------------
+// Compare 2 values, returning condition codes (-1, 0 or 1).
+class CmpNode : public SubNode {
+public:
+  CmpNode( Node *in1, Node *in2 ) : SubNode(in1,in2) {
+    init_class_id(Class_Cmp);
+  }
+  virtual Node *Identity( PhaseTransform *phase );
+  const Type *add_id() const { return TypeInt::ZERO; }
+  const Type *bottom_type() const { return TypeInt::CC; }
+  virtual uint ideal_reg() const { return Op_RegFlags; }
+};
+
+//------------------------------CmpINode---------------------------------------
+// Compare 2 signed values, returning condition codes (-1, 0 or 1).
+class CmpINode : public CmpNode {
+public:
+  CmpINode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpUNode---------------------------------------
+// Compare 2 unsigned values (integer or pointer), returning condition codes (-1, 0 or 1).
+class CmpUNode : public CmpNode {
+public:
+  CmpUNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpPNode---------------------------------------
+// Compare 2 pointer values, returning condition codes (-1, 0 or 1).
+class CmpPNode : public CmpNode {
+public:
+  CmpPNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpLNode---------------------------------------
+// Compare 2 long values, returning condition codes (-1, 0 or 1).
+class CmpLNode : public CmpNode {
+public:
+  CmpLNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int    Opcode() const;
+  virtual const Type *sub( const Type *, const Type * ) const;
+};
+
+//------------------------------CmpL3Node--------------------------------------
+// Compare 2 long values, returning integer value (-1, 0 or 1).
+class CmpL3Node : public CmpLNode {
+public:
+  CmpL3Node( Node *in1, Node *in2 ) : CmpLNode(in1,in2) {
+    // Since it is not consumed by Bools, it is not really a Cmp.
+    init_class_id(Class_Sub);
+  }
+  virtual int    Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------CmpFNode---------------------------------------
+// Compare 2 float values, returning condition codes (-1, 0 or 1).
+// This implements the Java bytecode fcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpFNode : public CmpNode {
+public:
+  CmpFNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
+  const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------CmpF3Node--------------------------------------
+// Compare 2 float values, returning integer value (-1, 0 or 1).
+// This implements the Java bytecode fcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpF3Node : public CmpFNode {
+public:
+  CmpF3Node( Node *in1, Node *in2 ) : CmpFNode(in1,in2) {
+    // Since it is not consumed by Bools, it is not really a Cmp.
+    init_class_id(Class_Sub);
+  }
+  virtual int Opcode() const;
+  // Since it is not consumed by Bools, it is not really a Cmp.
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------CmpDNode---------------------------------------
+// Compare 2 double values, returning condition codes (-1, 0 or 1).
+// This implements the Java bytecode dcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpDNode : public CmpNode {
+public:
+  CmpDNode( Node *in1, Node *in2 ) : CmpNode(in1,in2) {}
+  virtual int Opcode() const;
+  virtual const Type *sub( const Type *, const Type * ) const { ShouldNotReachHere(); return NULL; }
+  const Type *Value( PhaseTransform *phase ) const;
+  virtual Node  *Ideal(PhaseGVN *phase, bool can_reshape);
+};
+
+//------------------------------CmpD3Node--------------------------------------
+// Compare 2 double values, returning integer value (-1, 0 or 1).
+// This implements the Java bytecode dcmpl, so unordered returns -1.
+// Operands may not commute.
+class CmpD3Node : public CmpDNode {
+public:
+  CmpD3Node( Node *in1, Node *in2 ) : CmpDNode(in1,in2) {
+    // Since it is not consumed by Bools, it is not really a Cmp.
+    init_class_id(Class_Sub);
+  }
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------BoolTest---------------------------------------
+// Convert condition codes to a boolean test value (0 or -1).
+// We pick the values as 3 bits; the low order 2 bits we compare against the
+// condition codes, the high bit flips the sense of the result.
+struct BoolTest VALUE_OBJ_CLASS_SPEC {
+  enum mask { eq = 0, ne = 4, le = 5, ge = 7, lt = 3, gt = 1, illegal = 8 };
+  mask _test;
+  BoolTest( mask btm ) : _test(btm) {}
+  const Type *cc2logical( const Type *CC ) const;
+  // Commute the test.  I use a small table lookup.  The table is created as
+  // a simple char array where each element is the ASCII version of a 'mask'
+  // enum from above.
+  mask commute( ) const { return mask("038147858"[_test]-'0'); }
+  mask negate( ) const { return mask(_test^4); }
+  bool is_canonical( ) const { return (_test == BoolTest::ne || _test == BoolTest::lt || _test == BoolTest::le); }
+#ifndef PRODUCT
+  void dump_on(outputStream *st) const;
+#endif
+};
+
+//------------------------------BoolNode---------------------------------------
+// A Node to convert a Condition Codes to a Logical result.
+class BoolNode : public Node {
+  virtual uint hash() const;
+  virtual uint cmp( const Node &n ) const;
+  virtual uint size_of() const;
+public:
+  const BoolTest _test;
+  BoolNode( Node *cc, BoolTest::mask t): _test(t), Node(0,cc) {
+    init_class_id(Class_Bool);
+  }
+  // Convert an arbitrary int value to a Bool or other suitable predicate.
+  static Node* make_predicate(Node* test_value, PhaseGVN* phase);
+  // Convert self back to an integer value.
+  Node* as_int_value(PhaseGVN* phase);
+  // Invert sense of self, returning new Bool.
+  BoolNode* negate(PhaseGVN* phase);
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  virtual const Type *Value( PhaseTransform *phase ) const;
+  virtual const Type *bottom_type() const { return TypeInt::BOOL; }
+  uint match_edge(uint idx) const { return 0; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+
+  bool is_counted_loop_exit_test();
+#ifndef PRODUCT
+  virtual void dump_spec(outputStream *st) const;
+#endif
+};
+
+//------------------------------AbsNode----------------------------------------
+// Abstract class for absolute value.  Mostly used to get a handy wrapper
+// for finding this pattern in the graph.
+class AbsNode : public Node {
+public:
+  AbsNode( Node *value ) : Node(0,value) {}
+};
+
+//------------------------------AbsINode---------------------------------------
+// Absolute value an integer.  Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsINode : public AbsNode {
+public:
+  AbsINode( Node *in1 ) : AbsNode(in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------AbsFNode---------------------------------------
+// Absolute value a float, a common float-point idiom with a cheap hardware
+// implemention on most chips.  Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsFNode : public AbsNode {
+public:
+  AbsFNode( Node *in1 ) : AbsNode(in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------AbsDNode---------------------------------------
+// Absolute value a double, a common float-point idiom with a cheap hardware
+// implemention on most chips.  Since a naive graph involves control flow, we
+// "match" it in the ideal world (so the control flow can be removed).
+class AbsDNode : public AbsNode {
+public:
+  AbsDNode( Node *in1 ) : AbsNode(in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------CmpLTMaskNode----------------------------------
+// If p < q, return -1 else return 0.  Nice for flow-free idioms.
+class CmpLTMaskNode : public Node {
+public:
+  CmpLTMaskNode( Node *p, Node *q ) : Node(0, p, q) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+
+//------------------------------NegNode----------------------------------------
+class NegNode : public Node {
+public:
+  NegNode( Node *in1 ) : Node(0,in1) {}
+};
+
+//------------------------------NegFNode---------------------------------------
+// Negate value a float.  Negating 0.0 returns -0.0, but subtracting from
+// zero returns +0.0 (per JVM spec on 'fneg' bytecode).  As subtraction
+// cannot be used to replace negation we have to implement negation as ideal
+// node; note that negation and addition can replace subtraction.
+class NegFNode : public NegNode {
+public:
+  NegFNode( Node *in1 ) : NegNode(in1) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------NegDNode---------------------------------------
+// Negate value a double.  Negating 0.0 returns -0.0, but subtracting from
+// zero returns +0.0 (per JVM spec on 'dneg' bytecode).  As subtraction
+// cannot be used to replace negation we have to implement negation as ideal
+// node; note that negation and addition can replace subtraction.
+class NegDNode : public NegNode {
+public:
+  NegDNode( Node *in1 ) : NegNode(in1) {}
+  virtual int Opcode() const;
+  virtual Node *Ideal(PhaseGVN *phase, bool can_reshape);
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+//------------------------------CosDNode---------------------------------------
+// Cosinus of a double
+class CosDNode : public Node {
+public:
+  CosDNode( Node *in1  ) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------CosDNode---------------------------------------
+// Sinus of a double
+class SinDNode : public Node {
+public:
+  SinDNode( Node *in1  ) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//------------------------------TanDNode---------------------------------------
+// tangens of a double
+class TanDNode : public Node {
+public:
+  TanDNode(Node *in1  ) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+
+//------------------------------AtanDNode--------------------------------------
+// arcus tangens of a double
+class AtanDNode : public Node {
+public:
+  AtanDNode(Node *c, Node *in1, Node *in2  ) : Node(c, in1, in2) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
+
+
+//------------------------------SqrtDNode--------------------------------------
+// square root a double
+class SqrtDNode : public Node {
+public:
+  SqrtDNode(Node *c, Node *in1  ) : Node(c, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------ExpDNode---------------------------------------
+//  Exponentiate a double
+class ExpDNode : public Node {
+public:
+  ExpDNode( Node *c, Node *in1 ) : Node(c, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------LogDNode---------------------------------------
+// Log_e of a double
+class LogDNode : public Node {
+public:
+  LogDNode( Node *in1 ) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------Log10DNode---------------------------------------
+// Log_10 of a double
+class Log10DNode : public Node {
+public:
+  Log10DNode( Node *in1 ) : Node(0, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//------------------------------PowDNode---------------------------------------
+// Raise a double to a double power
+class PowDNode : public Node {
+public:
+  PowDNode(Node *c, Node *in1, Node *in2  ) : Node(c, in1, in2) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+  virtual const Type *Value( PhaseTransform *phase ) const;
+};
+
+//-------------------------------ReverseBytesINode--------------------------------
+// reverse bytes of an integer
+class ReverseBytesINode : public Node {
+public:
+  ReverseBytesINode(Node *c, Node *in1) : Node(c, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//-------------------------------ReverseBytesLNode--------------------------------
+// reverse bytes of a long
+class ReverseBytesLNode : public Node {
+public:
+  ReverseBytesLNode(Node *c, Node *in1) : Node(c, in1) {}
+  virtual int Opcode() const;
+  const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
diff --git a/src/share/vm/opto/superword.cpp b/src/share/vm/opto/superword.cpp
new file mode 100644
index 000000000..b1467fc9e
--- /dev/null
+++ b/src/share/vm/opto/superword.cpp
@@ -0,0 +1,2025 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_superword.cpp.incl"
+
+//
+//                  S U P E R W O R D   T R A N S F O R M
+//=============================================================================
+
+//------------------------------SuperWord---------------------------
+SuperWord::SuperWord(PhaseIdealLoop* phase) :
+  _phase(phase),
+  _igvn(phase->_igvn),
+  _arena(phase->C->comp_arena()),
+  _packset(arena(), 8,  0, NULL),         // packs for the current block
+  _bb_idx(arena(), (int)(1.10 * phase->C->unique()), 0, 0), // node idx to index in bb
+  _block(arena(), 8,  0, NULL),           // nodes in current block
+  _data_entry(arena(), 8,  0, NULL),      // nodes with all inputs from outside
+  _mem_slice_head(arena(), 8,  0, NULL),  // memory slice heads
+  _mem_slice_tail(arena(), 8,  0, NULL),  // memory slice tails
+  _node_info(arena(), 8,  0, SWNodeInfo::initial), // info needed per node
+  _align_to_ref(NULL),                    // memory reference to align vectors to
+  _disjoint_ptrs(arena(), 8,  0, OrderedPair::initial), // runtime disambiguated pointer pairs
+  _dg(_arena),                            // dependence graph
+  _visited(arena()),                      // visited node set
+  _post_visited(arena()),                 // post visited node set
+  _n_idx_list(arena(), 8),                // scratch list of (node,index) pairs
+  _stk(arena(), 8, 0, NULL),              // scratch stack of nodes
+  _nlist(arena(), 8, 0, NULL),            // scratch list of nodes
+  _lpt(NULL),                             // loop tree node
+  _lp(NULL),                              // LoopNode
+  _bb(NULL),                              // basic block
+  _iv(NULL)                               // induction var
+{}
+
+//------------------------------transform_loop---------------------------
+void SuperWord::transform_loop(IdealLoopTree* lpt) {
+  assert(lpt->_head->is_CountedLoop(), "must be");
+  CountedLoopNode *cl = lpt->_head->as_CountedLoop();
+
+  if (!cl->is_main_loop() ) return; // skip normal, pre, and post loops
+
+  // Check for no control flow in body (other than exit)
+  Node *cl_exit = cl->loopexit();
+  if (cl_exit->in(0) != lpt->_head) return;
+
+  // Check for pre-loop ending with CountedLoopEnd(Bool(Cmp(x,Opaque1(limit))))
+  CountedLoopEndNode* pre_end = get_pre_loop_end(cl);
+  if (pre_end == NULL) return;
+  Node *pre_opaq1 = pre_end->limit();
+  if (pre_opaq1->Opcode() != Op_Opaque1) return;
+
+  // Do vectors exist on this architecture?
+  if (vector_width_in_bytes() == 0) return;
+
+  init(); // initialize data structures
+
+  set_lpt(lpt);
+  set_lp(cl);
+
+ // For now, define one block which is the entire loop body
+  set_bb(cl);
+
+  assert(_packset.length() == 0, "packset must be empty");
+  SLP_extract();
+}
+
+//------------------------------SLP_extract---------------------------
+// Extract the superword level parallelism
+//
+// 1) A reverse post-order of nodes in the block is constructed.  By scanning
+//    this list from first to last, all definitions are visited before their uses.
+//
+// 2) A point-to-point dependence graph is constructed between memory references.
+//    This simplies the upcoming "independence" checker.
+//
+// 3) The maximum depth in the node graph from the beginning of the block
+//    to each node is computed.  This is used to prune the graph search
+//    in the independence checker.
+//
+// 4) For integer types, the necessary bit width is propagated backwards
+//    from stores to allow packed operations on byte, char, and short
+//    integers.  This reverses the promotion to type "int" that javac
+//    did for operations like: char c1,c2,c3;  c1 = c2 + c3.
+//
+// 5) One of the memory references is picked to be an aligned vector reference.
+//    The pre-loop trip count is adjusted to align this reference in the
+//    unrolled body.
+//
+// 6) The initial set of pack pairs is seeded with memory references.
+//
+// 7) The set of pack pairs is extended by following use->def and def->use links.
+//
+// 8) The pairs are combined into vector sized packs.
+//
+// 9) Reorder the memory slices to co-locate members of the memory packs.
+//
+// 10) Generate ideal vector nodes for the final set of packs and where necessary,
+//    inserting scalar promotion, vector creation from multiple scalars, and
+//    extraction of scalar values from vectors.
+//
+void SuperWord::SLP_extract() {
+
+  // Ready the block
+
+  construct_bb();
+
+  dependence_graph();
+
+  compute_max_depth();
+
+  compute_vector_element_type();
+
+  // Attempt vectorization
+
+  find_adjacent_refs();
+
+  extend_packlist();
+
+  combine_packs();
+
+  construct_my_pack_map();
+
+  filter_packs();
+
+  schedule();
+
+  output();
+}
+
+//------------------------------find_adjacent_refs---------------------------
+// Find the adjacent memory references and create pack pairs for them.
+// This is the initial set of packs that will then be extended by
+// following use->def and def->use links.  The align positions are
+// assigned relative to the reference "align_to_ref"
+void SuperWord::find_adjacent_refs() {
+  // Get list of memory operations
+  Node_List memops;
+  for (int i = 0; i < _block.length(); i++) {
+    Node* n = _block.at(i);
+    if (n->is_Mem() && in_bb(n)) {
+      int align = memory_alignment(n->as_Mem(), 0);
+      if (align != bottom_align) {
+        memops.push(n);
+      }
+    }
+  }
+  if (memops.size() == 0) return;
+
+  // Find a memory reference to align to.  The pre-loop trip count
+  // is modified to align this reference to a vector-aligned address
+  find_align_to_ref(memops);
+  if (align_to_ref() == NULL) return;
+
+  SWPointer align_to_ref_p(align_to_ref(), this);
+  int offset = align_to_ref_p.offset_in_bytes();
+  int scale  = align_to_ref_p.scale_in_bytes();
+  int vw              = vector_width_in_bytes();
+  int stride_sign     = (scale * iv_stride()) > 0 ? 1 : -1;
+  int iv_adjustment   = (stride_sign * vw - (offset % vw)) % vw;
+
+#ifndef PRODUCT
+  if (TraceSuperWord)
+    tty->print_cr("\noffset = %d iv_adjustment = %d  elt_align = %d",
+                  offset, iv_adjustment, align_to_ref_p.memory_size());
+#endif
+
+  // Set alignment relative to "align_to_ref"
+  for (int i = memops.size() - 1; i >= 0; i--) {
+    MemNode* s = memops.at(i)->as_Mem();
+    SWPointer p2(s, this);
+    if (p2.comparable(align_to_ref_p)) {
+      int align = memory_alignment(s, iv_adjustment);
+      set_alignment(s, align);
+    } else {
+      memops.remove(i);
+    }
+  }
+
+  // Create initial pack pairs of memory operations
+  for (uint i = 0; i < memops.size(); i++) {
+    Node* s1 = memops.at(i);
+    for (uint j = 0; j < memops.size(); j++) {
+      Node* s2 = memops.at(j);
+      if (s1 != s2 && are_adjacent_refs(s1, s2)) {
+        int align = alignment(s1);
+        if (stmts_can_pack(s1, s2, align)) {
+          Node_List* pair = new Node_List();
+          pair->push(s1);
+          pair->push(s2);
+          _packset.append(pair);
+        }
+      }
+    }
+  }
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    tty->print_cr("\nAfter find_adjacent_refs");
+    print_packset();
+  }
+#endif
+}
+
+//------------------------------find_align_to_ref---------------------------
+// Find a memory reference to align the loop induction variable to.
+// Looks first at stores then at loads, looking for a memory reference
+// with the largest number of references similar to it.
+void SuperWord::find_align_to_ref(Node_List &memops) {
+  GrowableArray<int> cmp_ct(arena(), memops.size(), memops.size(), 0);
+
+  // Count number of comparable memory ops
+  for (uint i = 0; i < memops.size(); i++) {
+    MemNode* s1 = memops.at(i)->as_Mem();
+    SWPointer p1(s1, this);
+    // Discard if pre loop can't align this reference
+    if (!ref_is_alignable(p1)) {
+      *cmp_ct.adr_at(i) = 0;
+      continue;
+    }
+    for (uint j = i+1; j < memops.size(); j++) {
+      MemNode* s2 = memops.at(j)->as_Mem();
+      if (isomorphic(s1, s2)) {
+        SWPointer p2(s2, this);
+        if (p1.comparable(p2)) {
+          (*cmp_ct.adr_at(i))++;
+          (*cmp_ct.adr_at(j))++;
+        }
+      }
+    }
+  }
+
+  // Find Store (or Load) with the greatest number of "comparable" references
+  int max_ct        = 0;
+  int max_idx       = -1;
+  int min_size      = max_jint;
+  int min_iv_offset = max_jint;
+  for (uint j = 0; j < memops.size(); j++) {
+    MemNode* s = memops.at(j)->as_Mem();
+    if (s->is_Store()) {
+      SWPointer p(s, this);
+      if (cmp_ct.at(j) > max_ct ||
+          cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+                                     data_size(s) == min_size &&
+                                        p.offset_in_bytes() < min_iv_offset)) {
+        max_ct = cmp_ct.at(j);
+        max_idx = j;
+        min_size = data_size(s);
+        min_iv_offset = p.offset_in_bytes();
+      }
+    }
+  }
+  // If no stores, look at loads
+  if (max_ct == 0) {
+    for (uint j = 0; j < memops.size(); j++) {
+      MemNode* s = memops.at(j)->as_Mem();
+      if (s->is_Load()) {
+        SWPointer p(s, this);
+        if (cmp_ct.at(j) > max_ct ||
+            cmp_ct.at(j) == max_ct && (data_size(s) < min_size ||
+                                       data_size(s) == min_size &&
+                                          p.offset_in_bytes() < min_iv_offset)) {
+          max_ct = cmp_ct.at(j);
+          max_idx = j;
+          min_size = data_size(s);
+          min_iv_offset = p.offset_in_bytes();
+        }
+      }
+    }
+  }
+
+  if (max_ct > 0)
+    set_align_to_ref(memops.at(max_idx)->as_Mem());
+
+#ifndef PRODUCT
+  if (TraceSuperWord && Verbose) {
+    tty->print_cr("\nVector memops after find_align_to_refs");
+    for (uint i = 0; i < memops.size(); i++) {
+      MemNode* s = memops.at(i)->as_Mem();
+      s->dump();
+    }
+  }
+#endif
+}
+
+//------------------------------ref_is_alignable---------------------------
+// Can the preloop align the reference to position zero in the vector?
+bool SuperWord::ref_is_alignable(SWPointer& p) {
+  if (!p.has_iv()) {
+    return true;   // no induction variable
+  }
+  CountedLoopEndNode* pre_end = get_pre_loop_end(lp()->as_CountedLoop());
+  assert(pre_end->stride_is_con(), "pre loop stride is constant");
+  int preloop_stride = pre_end->stride_con();
+
+  int span = preloop_stride * p.scale_in_bytes();
+
+  // Stride one accesses are alignable.
+  if (ABS(span) == p.memory_size())
+    return true;
+
+  // If initial offset from start of object is computable,
+  // compute alignment within the vector.
+  int vw = vector_width_in_bytes();
+  if (vw % span == 0) {
+    Node* init_nd = pre_end->init_trip();
+    if (init_nd->is_Con() && p.invar() == NULL) {
+      int init = init_nd->bottom_type()->is_int()->get_con();
+
+      int init_offset = init * p.scale_in_bytes() + p.offset_in_bytes();
+      assert(init_offset >= 0, "positive offset from object start");
+
+      if (span > 0) {
+        return (vw - (init_offset % vw)) % span == 0;
+      } else {
+        assert(span < 0, "nonzero stride * scale");
+        return (init_offset % vw) % -span == 0;
+      }
+    }
+  }
+  return false;
+}
+
+//---------------------------dependence_graph---------------------------
+// Construct dependency graph.
+// Add dependence edges to load/store nodes for memory dependence
+//    A.out()->DependNode.in(1) and DependNode.out()->B.prec(x)
+void SuperWord::dependence_graph() {
+  // First, assign a dependence node to each memory node
+  for (int i = 0; i < _block.length(); i++ ) {
+    Node *n = _block.at(i);
+    if (n->is_Mem() || n->is_Phi() && n->bottom_type() == Type::MEMORY) {
+      _dg.make_node(n);
+    }
+  }
+
+  // For each memory slice, create the dependences
+  for (int i = 0; i < _mem_slice_head.length(); i++) {
+    Node* n      = _mem_slice_head.at(i);
+    Node* n_tail = _mem_slice_tail.at(i);
+
+    // Get slice in predecessor order (last is first)
+    mem_slice_preds(n_tail, n, _nlist);
+
+    // Make the slice dependent on the root
+    DepMem* slice = _dg.dep(n);
+    _dg.make_edge(_dg.root(), slice);
+
+    // Create a sink for the slice
+    DepMem* slice_sink = _dg.make_node(NULL);
+    _dg.make_edge(slice_sink, _dg.tail());
+
+    // Now visit each pair of memory ops, creating the edges
+    for (int j = _nlist.length() - 1; j >= 0 ; j--) {
+      Node* s1 = _nlist.at(j);
+
+      // If no dependency yet, use slice
+      if (_dg.dep(s1)->in_cnt() == 0) {
+        _dg.make_edge(slice, s1);
+      }
+      SWPointer p1(s1->as_Mem(), this);
+      bool sink_dependent = true;
+      for (int k = j - 1; k >= 0; k--) {
+        Node* s2 = _nlist.at(k);
+        if (s1->is_Load() && s2->is_Load())
+          continue;
+        SWPointer p2(s2->as_Mem(), this);
+
+        int cmp = p1.cmp(p2);
+        if (SuperWordRTDepCheck &&
+            p1.base() != p2.base() && p1.valid() && p2.valid()) {
+          // Create a runtime check to disambiguate
+          OrderedPair pp(p1.base(), p2.base());
+          _disjoint_ptrs.append_if_missing(pp);
+        } else if (!SWPointer::not_equal(cmp)) {
+          // Possibly same address
+          _dg.make_edge(s1, s2);
+          sink_dependent = false;
+        }
+      }
+      if (sink_dependent) {
+        _dg.make_edge(s1, slice_sink);
+      }
+    }
+#ifndef PRODUCT
+    if (TraceSuperWord) {
+      tty->print_cr("\nDependence graph for slice: %d", n->_idx);
+      for (int q = 0; q < _nlist.length(); q++) {
+        _dg.print(_nlist.at(q));
+      }
+      tty->cr();
+    }
+#endif
+    _nlist.clear();
+  }
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    tty->print_cr("\ndisjoint_ptrs: %s", _disjoint_ptrs.length() > 0 ? "" : "NONE");
+    for (int r = 0; r < _disjoint_ptrs.length(); r++) {
+      _disjoint_ptrs.at(r).print();
+      tty->cr();
+    }
+    tty->cr();
+  }
+#endif
+}
+
+//---------------------------mem_slice_preds---------------------------
+// Return a memory slice (node list) in predecessor order starting at "start"
+void SuperWord::mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds) {
+  assert(preds.length() == 0, "start empty");
+  Node* n = start;
+  Node* prev = NULL;
+  while (true) {
+    assert(in_bb(n), "must be in block");
+    for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+      Node* out = n->fast_out(i);
+      if (out->is_Load()) {
+        if (in_bb(out)) {
+          preds.push(out);
+        }
+      } else {
+        // FIXME
+        if (out->is_MergeMem() && !in_bb(out)) {
+          // Either unrolling is causing a memory edge not to disappear,
+          // or need to run igvn.optimize() again before SLP
+        } else if (out->is_Phi() && out->bottom_type() == Type::MEMORY && !in_bb(out)) {
+          // Ditto.  Not sure what else to check further.
+        } else if (out->Opcode() == Op_StoreCM && out->in(4) == n) {
+          // StoreCM has an input edge used as a precedence edge.
+          // Maybe an issue when oop stores are vectorized.
+        } else {
+          assert(out == prev || prev == NULL, "no branches off of store slice");
+        }
+      }
+    }
+    if (n == stop) break;
+    preds.push(n);
+    prev = n;
+    n = n->in(MemNode::Memory);
+  }
+}
+
+//------------------------------stmts_can_pack---------------------------
+// Can s1 and s2 be in a pack with s1 immediately preceeding s2 and
+// s1 aligned at "align"
+bool SuperWord::stmts_can_pack(Node* s1, Node* s2, int align) {
+  if (isomorphic(s1, s2)) {
+    if (independent(s1, s2)) {
+      if (!exists_at(s1, 0) && !exists_at(s2, 1)) {
+        if (!s1->is_Mem() || are_adjacent_refs(s1, s2)) {
+          int s1_align = alignment(s1);
+          int s2_align = alignment(s2);
+          if (s1_align == top_align || s1_align == align) {
+            if (s2_align == top_align || s2_align == align + data_size(s1)) {
+              return true;
+            }
+          }
+        }
+      }
+    }
+  }
+  return false;
+}
+
+//------------------------------exists_at---------------------------
+// Does s exist in a pack at position pos?
+bool SuperWord::exists_at(Node* s, uint pos) {
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p = _packset.at(i);
+    if (p->at(pos) == s) {
+      return true;
+    }
+  }
+  return false;
+}
+
+//------------------------------are_adjacent_refs---------------------------
+// Is s1 immediately before s2 in memory?
+bool SuperWord::are_adjacent_refs(Node* s1, Node* s2) {
+  if (!s1->is_Mem() || !s2->is_Mem()) return false;
+  if (!in_bb(s1)    || !in_bb(s2))    return false;
+  // FIXME - co_locate_pack fails on Stores in different mem-slices, so
+  // only pack memops that are in the same alias set until that's fixed.
+  if (_phase->C->get_alias_index(s1->as_Mem()->adr_type()) !=
+      _phase->C->get_alias_index(s2->as_Mem()->adr_type()))
+    return false;
+  SWPointer p1(s1->as_Mem(), this);
+  SWPointer p2(s2->as_Mem(), this);
+  if (p1.base() != p2.base() || !p1.comparable(p2)) return false;
+  int diff = p2.offset_in_bytes() - p1.offset_in_bytes();
+  return diff == data_size(s1);
+}
+
+//------------------------------isomorphic---------------------------
+// Are s1 and s2 similar?
+bool SuperWord::isomorphic(Node* s1, Node* s2) {
+  if (s1->Opcode() != s2->Opcode()) return false;
+  if (s1->req() != s2->req()) return false;
+  if (s1->in(0) != s2->in(0)) return false;
+  if (velt_type(s1) != velt_type(s2)) return false;
+  return true;
+}
+
+//------------------------------independent---------------------------
+// Is there no data path from s1 to s2 or s2 to s1?
+bool SuperWord::independent(Node* s1, Node* s2) {
+  //  assert(s1->Opcode() == s2->Opcode(), "check isomorphic first");
+  int d1 = depth(s1);
+  int d2 = depth(s2);
+  if (d1 == d2) return s1 != s2;
+  Node* deep    = d1 > d2 ? s1 : s2;
+  Node* shallow = d1 > d2 ? s2 : s1;
+
+  visited_clear();
+
+  return independent_path(shallow, deep);
+}
+
+//------------------------------independent_path------------------------------
+// Helper for independent
+bool SuperWord::independent_path(Node* shallow, Node* deep, uint dp) {
+  if (dp >= 1000) return false; // stop deep recursion
+  visited_set(deep);
+  int shal_depth = depth(shallow);
+  assert(shal_depth <= depth(deep), "must be");
+  for (DepPreds preds(deep, _dg); !preds.done(); preds.next()) {
+    Node* pred = preds.current();
+    if (in_bb(pred) && !visited_test(pred)) {
+      if (shallow == pred) {
+        return false;
+      }
+      if (shal_depth < depth(pred) && !independent_path(shallow, pred, dp+1)) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
+//------------------------------set_alignment---------------------------
+void SuperWord::set_alignment(Node* s1, Node* s2, int align) {
+  set_alignment(s1, align);
+  set_alignment(s2, align + data_size(s1));
+}
+
+//------------------------------data_size---------------------------
+int SuperWord::data_size(Node* s) {
+  const Type* t = velt_type(s);
+  BasicType  bt = t->array_element_basic_type();
+  int bsize = type2aelembytes[bt];
+  assert(bsize != 0, "valid size");
+  return bsize;
+}
+
+//------------------------------extend_packlist---------------------------
+// Extend packset by following use->def and def->use links from pack members.
+void SuperWord::extend_packlist() {
+  bool changed;
+  do {
+    changed = false;
+    for (int i = 0; i < _packset.length(); i++) {
+      Node_List* p = _packset.at(i);
+      changed |= follow_use_defs(p);
+      changed |= follow_def_uses(p);
+    }
+  } while (changed);
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    tty->print_cr("\nAfter extend_packlist");
+    print_packset();
+  }
+#endif
+}
+
+//------------------------------follow_use_defs---------------------------
+// Extend the packset by visiting operand definitions of nodes in pack p
+bool SuperWord::follow_use_defs(Node_List* p) {
+  Node* s1 = p->at(0);
+  Node* s2 = p->at(1);
+  assert(p->size() == 2, "just checking");
+  assert(s1->req() == s2->req(), "just checking");
+  assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
+
+  if (s1->is_Load()) return false;
+
+  int align = alignment(s1);
+  bool changed = false;
+  int start = s1->is_Store() ? MemNode::ValueIn   : 1;
+  int end   = s1->is_Store() ? MemNode::ValueIn+1 : s1->req();
+  for (int j = start; j < end; j++) {
+    Node* t1 = s1->in(j);
+    Node* t2 = s2->in(j);
+    if (!in_bb(t1) || !in_bb(t2))
+      continue;
+    if (stmts_can_pack(t1, t2, align)) {
+      if (est_savings(t1, t2) >= 0) {
+        Node_List* pair = new Node_List();
+        pair->push(t1);
+        pair->push(t2);
+        _packset.append(pair);
+        set_alignment(t1, t2, align);
+        changed = true;
+      }
+    }
+  }
+  return changed;
+}
+
+//------------------------------follow_def_uses---------------------------
+// Extend the packset by visiting uses of nodes in pack p
+bool SuperWord::follow_def_uses(Node_List* p) {
+  bool changed = false;
+  Node* s1 = p->at(0);
+  Node* s2 = p->at(1);
+  assert(p->size() == 2, "just checking");
+  assert(s1->req() == s2->req(), "just checking");
+  assert(alignment(s1) + data_size(s1) == alignment(s2), "just checking");
+
+  if (s1->is_Store()) return false;
+
+  int align = alignment(s1);
+  int savings = -1;
+  Node* u1 = NULL;
+  Node* u2 = NULL;
+  for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+    Node* t1 = s1->fast_out(i);
+    if (!in_bb(t1)) continue;
+    for (DUIterator_Fast jmax, j = s2->fast_outs(jmax); j < jmax; j++) {
+      Node* t2 = s2->fast_out(j);
+      if (!in_bb(t2)) continue;
+      if (!opnd_positions_match(s1, t1, s2, t2))
+        continue;
+      if (stmts_can_pack(t1, t2, align)) {
+        int my_savings = est_savings(t1, t2);
+        if (my_savings > savings) {
+          savings = my_savings;
+          u1 = t1;
+          u2 = t2;
+        }
+      }
+    }
+  }
+  if (savings >= 0) {
+    Node_List* pair = new Node_List();
+    pair->push(u1);
+    pair->push(u2);
+    _packset.append(pair);
+    set_alignment(u1, u2, align);
+    changed = true;
+  }
+  return changed;
+}
+
+//---------------------------opnd_positions_match-------------------------
+// Is the use of d1 in u1 at the same operand position as d2 in u2?
+bool SuperWord::opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2) {
+  uint ct = u1->req();
+  if (ct != u2->req()) return false;
+  uint i1 = 0;
+  uint i2 = 0;
+  do {
+    for (i1++; i1 < ct; i1++) if (u1->in(i1) == d1) break;
+    for (i2++; i2 < ct; i2++) if (u2->in(i2) == d2) break;
+    if (i1 != i2) {
+      return false;
+    }
+  } while (i1 < ct);
+  return true;
+}
+
+//------------------------------est_savings---------------------------
+// Estimate the savings from executing s1 and s2 as a pack
+int SuperWord::est_savings(Node* s1, Node* s2) {
+  int save = 2 - 1; // 2 operations per instruction in packed form
+
+  // inputs
+  for (uint i = 1; i < s1->req(); i++) {
+    Node* x1 = s1->in(i);
+    Node* x2 = s2->in(i);
+    if (x1 != x2) {
+      if (are_adjacent_refs(x1, x2)) {
+        save += adjacent_profit(x1, x2);
+      } else if (!in_packset(x1, x2)) {
+        save -= pack_cost(2);
+      } else {
+        save += unpack_cost(2);
+      }
+    }
+  }
+
+  // uses of result
+  uint ct = 0;
+  for (DUIterator_Fast imax, i = s1->fast_outs(imax); i < imax; i++) {
+    Node* s1_use = s1->fast_out(i);
+    for (int j = 0; j < _packset.length(); j++) {
+      Node_List* p = _packset.at(j);
+      if (p->at(0) == s1_use) {
+        for (DUIterator_Fast kmax, k = s2->fast_outs(kmax); k < kmax; k++) {
+          Node* s2_use = s2->fast_out(k);
+          if (p->at(p->size()-1) == s2_use) {
+            ct++;
+            if (are_adjacent_refs(s1_use, s2_use)) {
+              save += adjacent_profit(s1_use, s2_use);
+            }
+          }
+        }
+      }
+    }
+  }
+
+  if (ct < s1->outcnt()) save += unpack_cost(1);
+  if (ct < s2->outcnt()) save += unpack_cost(1);
+
+  return save;
+}
+
+//------------------------------costs---------------------------
+int SuperWord::adjacent_profit(Node* s1, Node* s2) { return 2; }
+int SuperWord::pack_cost(int ct)   { return ct; }
+int SuperWord::unpack_cost(int ct) { return ct; }
+
+//------------------------------combine_packs---------------------------
+// Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
+void SuperWord::combine_packs() {
+  bool changed;
+  do {
+    changed = false;
+    for (int i = 0; i < _packset.length(); i++) {
+      Node_List* p1 = _packset.at(i);
+      if (p1 == NULL) continue;
+      for (int j = 0; j < _packset.length(); j++) {
+        Node_List* p2 = _packset.at(j);
+        if (p2 == NULL) continue;
+        if (p1->at(p1->size()-1) == p2->at(0)) {
+          for (uint k = 1; k < p2->size(); k++) {
+            p1->push(p2->at(k));
+          }
+          _packset.at_put(j, NULL);
+          changed = true;
+        }
+      }
+    }
+  } while (changed);
+
+  for (int i = _packset.length() - 1; i >= 0; i--) {
+    Node_List* p1 = _packset.at(i);
+    if (p1 == NULL) {
+      _packset.remove_at(i);
+    }
+  }
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    tty->print_cr("\nAfter combine_packs");
+    print_packset();
+  }
+#endif
+}
+
+//-----------------------------construct_my_pack_map--------------------------
+// Construct the map from nodes to packs.  Only valid after the
+// point where a node is only in one pack (after combine_packs).
+void SuperWord::construct_my_pack_map() {
+  Node_List* rslt = NULL;
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p = _packset.at(i);
+    for (uint j = 0; j < p->size(); j++) {
+      Node* s = p->at(j);
+      assert(my_pack(s) == NULL, "only in one pack");
+      set_my_pack(s, p);
+    }
+  }
+}
+
+//------------------------------filter_packs---------------------------
+// Remove packs that are not implemented or not profitable.
+void SuperWord::filter_packs() {
+
+  // Remove packs that are not implemented
+  for (int i = _packset.length() - 1; i >= 0; i--) {
+    Node_List* pk = _packset.at(i);
+    bool impl = implemented(pk);
+    if (!impl) {
+#ifndef PRODUCT
+      if (TraceSuperWord && Verbose) {
+        tty->print_cr("Unimplemented");
+        pk->at(0)->dump();
+      }
+#endif
+      remove_pack_at(i);
+    }
+  }
+
+  // Remove packs that are not profitable
+  bool changed;
+  do {
+    changed = false;
+    for (int i = _packset.length() - 1; i >= 0; i--) {
+      Node_List* pk = _packset.at(i);
+      bool prof = profitable(pk);
+      if (!prof) {
+#ifndef PRODUCT
+        if (TraceSuperWord && Verbose) {
+          tty->print_cr("Unprofitable");
+          pk->at(0)->dump();
+        }
+#endif
+        remove_pack_at(i);
+        changed = true;
+      }
+    }
+  } while (changed);
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    tty->print_cr("\nAfter filter_packs");
+    print_packset();
+    tty->cr();
+  }
+#endif
+}
+
+//------------------------------implemented---------------------------
+// Can code be generated for pack p?
+bool SuperWord::implemented(Node_List* p) {
+  Node* p0 = p->at(0);
+  int vopc = VectorNode::opcode(p0->Opcode(), p->size(), velt_type(p0));
+  return vopc > 0 && Matcher::has_match_rule(vopc);
+}
+
+//------------------------------profitable---------------------------
+// For pack p, are all operands and all uses (with in the block) vector?
+bool SuperWord::profitable(Node_List* p) {
+  Node* p0 = p->at(0);
+  uint start, end;
+  vector_opd_range(p0, &start, &end);
+
+  // Return false if some input is not vector and inside block
+  for (uint i = start; i < end; i++) {
+    if (!is_vector_use(p0, i)) {
+      // For now, return false if not scalar promotion case (inputs are the same.)
+      // Later, implement PackNode and allow differring, non-vector inputs
+      // (maybe just the ones from outside the block.)
+      Node* p0_def = p0->in(i);
+      for (uint j = 1; j < p->size(); j++) {
+        Node* use = p->at(j);
+        Node* def = use->in(i);
+        if (p0_def != def)
+          return false;
+      }
+    }
+  }
+  if (!p0->is_Store()) {
+    // For now, return false if not all uses are vector.
+    // Later, implement ExtractNode and allow non-vector uses (maybe
+    // just the ones outside the block.)
+    for (uint i = 0; i < p->size(); i++) {
+      Node* def = p->at(i);
+      for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+        Node* use = def->fast_out(j);
+        for (uint k = 0; k < use->req(); k++) {
+          Node* n = use->in(k);
+          if (def == n) {
+            if (!is_vector_use(use, k)) {
+              return false;
+            }
+          }
+        }
+      }
+    }
+  }
+  return true;
+}
+
+//------------------------------schedule---------------------------
+// Adjust the memory graph for the packed operations
+void SuperWord::schedule() {
+
+  // Co-locate in the memory graph the members of each memory pack
+  for (int i = 0; i < _packset.length(); i++) {
+    co_locate_pack(_packset.at(i));
+  }
+}
+
+//------------------------------co_locate_pack---------------------------
+// Within a pack, move stores down to the last executed store,
+// and move loads up to the first executed load.
+void SuperWord::co_locate_pack(Node_List* pk) {
+  if (pk->at(0)->is_Store()) {
+    // Push Stores down towards last executed pack member
+    MemNode* first     = executed_first(pk)->as_Mem();
+    MemNode* last      = executed_last(pk)->as_Mem();
+    MemNode* insert_pt = last;
+    MemNode* current   = last->in(MemNode::Memory)->as_Mem();
+    while (true) {
+      assert(in_bb(current), "stay in block");
+      Node* my_mem = current->in(MemNode::Memory);
+      if (in_pack(current, pk)) {
+        // Forward users of my memory state to my input memory state
+        _igvn.hash_delete(current);
+        _igvn.hash_delete(my_mem);
+        for (DUIterator i = current->outs(); current->has_out(i); i++) {
+          Node* use = current->out(i);
+          if (use->is_Mem()) {
+            assert(use->in(MemNode::Memory) == current, "must be");
+            _igvn.hash_delete(use);
+            use->set_req(MemNode::Memory, my_mem);
+            _igvn._worklist.push(use);
+            --i; // deleted this edge; rescan position
+          }
+        }
+        // put current immediately before insert_pt
+        current->set_req(MemNode::Memory, insert_pt->in(MemNode::Memory));
+        _igvn.hash_delete(insert_pt);
+        insert_pt->set_req(MemNode::Memory, current);
+        _igvn._worklist.push(insert_pt);
+        _igvn._worklist.push(current);
+        insert_pt = current;
+      }
+      if (current == first) break;
+      current = my_mem->as_Mem();
+    }
+  } else if (pk->at(0)->is_Load()) {
+    // Pull Loads up towards first executed pack member
+    LoadNode* first = executed_first(pk)->as_Load();
+    Node* first_mem = first->in(MemNode::Memory);
+    _igvn.hash_delete(first_mem);
+    // Give each load same memory state as first
+    for (uint i = 0; i < pk->size(); i++) {
+      LoadNode* ld = pk->at(i)->as_Load();
+      _igvn.hash_delete(ld);
+      ld->set_req(MemNode::Memory, first_mem);
+      _igvn._worklist.push(ld);
+    }
+  }
+}
+
+//------------------------------output---------------------------
+// Convert packs into vector node operations
+void SuperWord::output() {
+  if (_packset.length() == 0) return;
+
+  // MUST ENSURE main loop's initial value is properly aligned:
+  //  (iv_initial_value + min_iv_offset) % vector_width_in_bytes() == 0
+
+  align_initial_loop_index(align_to_ref());
+
+  // Insert extract (unpack) operations for scalar uses
+  for (int i = 0; i < _packset.length(); i++) {
+    insert_extracts(_packset.at(i));
+  }
+
+  for (int i = 0; i < _block.length(); i++) {
+    Node* n = _block.at(i);
+    Node_List* p = my_pack(n);
+    if (p && n == executed_last(p)) {
+      uint vlen = p->size();
+      Node* vn = NULL;
+      Node* low_adr = p->at(0);
+      Node* first   = executed_first(p);
+      if (n->is_Load()) {
+        int   opc = n->Opcode();
+        Node* ctl = n->in(MemNode::Control);
+        Node* mem = first->in(MemNode::Memory);
+        Node* adr = low_adr->in(MemNode::Address);
+        const TypePtr* atyp = n->adr_type();
+        vn = VectorLoadNode::make(_phase->C, opc, ctl, mem, adr, atyp, vlen);
+
+      } else if (n->is_Store()) {
+        // Promote value to be stored to vector
+        VectorNode* val = vector_opd(p, MemNode::ValueIn);
+
+        int   opc = n->Opcode();
+        Node* ctl = n->in(MemNode::Control);
+        Node* mem = first->in(MemNode::Memory);
+        Node* adr = low_adr->in(MemNode::Address);
+        const TypePtr* atyp = n->adr_type();
+        vn = VectorStoreNode::make(_phase->C, opc, ctl, mem, adr, atyp, val, vlen);
+
+      } else if (n->req() == 3) {
+        // Promote operands to vector
+        Node* in1 = vector_opd(p, 1);
+        Node* in2 = vector_opd(p, 2);
+        vn = VectorNode::make(_phase->C, n->Opcode(), in1, in2, vlen, velt_type(n));
+
+      } else {
+        ShouldNotReachHere();
+      }
+
+      _phase->_igvn.register_new_node_with_optimizer(vn);
+      _phase->set_ctrl(vn, _phase->get_ctrl(p->at(0)));
+      for (uint j = 0; j < p->size(); j++) {
+        Node* pm = p->at(j);
+        _igvn.hash_delete(pm);
+        _igvn.subsume_node(pm, vn);
+      }
+      _igvn._worklist.push(vn);
+    }
+  }
+}
+
+//------------------------------vector_opd---------------------------
+// Create a vector operand for the nodes in pack p for operand: in(opd_idx)
+VectorNode* SuperWord::vector_opd(Node_List* p, int opd_idx) {
+  Node* p0 = p->at(0);
+  uint vlen = p->size();
+  Node* opd = p0->in(opd_idx);
+
+  bool same_opd = true;
+  for (uint i = 1; i < vlen; i++) {
+    Node* pi = p->at(i);
+    Node* in = pi->in(opd_idx);
+    if (opd != in) {
+      same_opd = false;
+      break;
+    }
+  }
+
+  if (same_opd) {
+    if (opd->is_Vector()) {
+      return (VectorNode*)opd; // input is matching vector
+    }
+    // Convert scalar input to vector. Use p0's type because it's container
+    // maybe smaller than the operand's container.
+    const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
+    const Type* p0_t  = velt_type(p0);
+    if (p0_t->higher_equal(opd_t)) opd_t = p0_t;
+    VectorNode* vn    = VectorNode::scalar2vector(_phase->C, opd, vlen, opd_t);
+
+    _phase->_igvn.register_new_node_with_optimizer(vn);
+    _phase->set_ctrl(vn, _phase->get_ctrl(opd));
+    return vn;
+  }
+
+  // Insert pack operation
+  const Type* opd_t = velt_type(!in_bb(opd) ? p0 : opd);
+  PackNode* pk = PackNode::make(_phase->C, opd, opd_t);
+
+  for (uint i = 1; i < vlen; i++) {
+    Node* pi = p->at(i);
+    Node* in = pi->in(opd_idx);
+    assert(my_pack(in) == NULL, "Should already have been unpacked");
+    assert(opd_t == velt_type(!in_bb(in) ? pi : in), "all same type");
+    pk->add_opd(in);
+  }
+  _phase->_igvn.register_new_node_with_optimizer(pk);
+  _phase->set_ctrl(pk, _phase->get_ctrl(opd));
+  return pk;
+}
+
+//------------------------------insert_extracts---------------------------
+// If a use of pack p is not a vector use, then replace the
+// use with an extract operation.
+void SuperWord::insert_extracts(Node_List* p) {
+  if (p->at(0)->is_Store()) return;
+  assert(_n_idx_list.is_empty(), "empty (node,index) list");
+
+  // Inspect each use of each pack member.  For each use that is
+  // not a vector use, replace the use with an extract operation.
+
+  for (uint i = 0; i < p->size(); i++) {
+    Node* def = p->at(i);
+    for (DUIterator_Fast jmax, j = def->fast_outs(jmax); j < jmax; j++) {
+      Node* use = def->fast_out(j);
+      for (uint k = 0; k < use->req(); k++) {
+        Node* n = use->in(k);
+        if (def == n) {
+          if (!is_vector_use(use, k)) {
+            _n_idx_list.push(use, k);
+          }
+        }
+      }
+    }
+  }
+
+  while (_n_idx_list.is_nonempty()) {
+    Node* use = _n_idx_list.node();
+    int   idx = _n_idx_list.index();
+    _n_idx_list.pop();
+    Node* def = use->in(idx);
+
+    // Insert extract operation
+    _igvn.hash_delete(def);
+    _igvn.hash_delete(use);
+    int def_pos = alignment(def) / data_size(def);
+    const Type* def_t = velt_type(def);
+
+    Node* ex = ExtractNode::make(_phase->C, def, def_pos, def_t);
+    _phase->_igvn.register_new_node_with_optimizer(ex);
+    _phase->set_ctrl(ex, _phase->get_ctrl(def));
+    use->set_req(idx, ex);
+    _igvn._worklist.push(def);
+    _igvn._worklist.push(use);
+
+    bb_insert_after(ex, bb_idx(def));
+    set_velt_type(ex, def_t);
+  }
+}
+
+//------------------------------is_vector_use---------------------------
+// Is use->in(u_idx) a vector use?
+bool SuperWord::is_vector_use(Node* use, int u_idx) {
+  Node_List* u_pk = my_pack(use);
+  if (u_pk == NULL) return false;
+  Node* def = use->in(u_idx);
+  Node_List* d_pk = my_pack(def);
+  if (d_pk == NULL) {
+    // check for scalar promotion
+    Node* n = u_pk->at(0)->in(u_idx);
+    for (uint i = 1; i < u_pk->size(); i++) {
+      if (u_pk->at(i)->in(u_idx) != n) return false;
+    }
+    return true;
+  }
+  if (u_pk->size() != d_pk->size())
+    return false;
+  for (uint i = 0; i < u_pk->size(); i++) {
+    Node* ui = u_pk->at(i);
+    Node* di = d_pk->at(i);
+    if (ui->in(u_idx) != di || alignment(ui) != alignment(di))
+      return false;
+  }
+  return true;
+}
+
+//------------------------------construct_bb---------------------------
+// Construct reverse postorder list of block members
+void SuperWord::construct_bb() {
+  Node* entry = bb();
+
+  assert(_stk.length() == 0,            "stk is empty");
+  assert(_block.length() == 0,          "block is empty");
+  assert(_data_entry.length() == 0,     "data_entry is empty");
+  assert(_mem_slice_head.length() == 0, "mem_slice_head is empty");
+  assert(_mem_slice_tail.length() == 0, "mem_slice_tail is empty");
+
+  // Find non-control nodes with no inputs from within block,
+  // create a temporary map from node _idx to bb_idx for use
+  // by the visited and post_visited sets,
+  // and count number of nodes in block.
+  int bb_ct = 0;
+  for (uint i = 0; i < lpt()->_body.size(); i++ ) {
+    Node *n = lpt()->_body.at(i);
+    set_bb_idx(n, i); // Create a temporary map
+    if (in_bb(n)) {
+      bb_ct++;
+      if (!n->is_CFG()) {
+        bool found = false;
+        for (uint j = 0; j < n->req(); j++) {
+          Node* def = n->in(j);
+          if (def && in_bb(def)) {
+            found = true;
+            break;
+          }
+        }
+        if (!found) {
+          assert(n != entry, "can't be entry");
+          _data_entry.push(n);
+        }
+      }
+    }
+  }
+
+  // Find memory slices (head and tail)
+  for (DUIterator_Fast imax, i = lp()->fast_outs(imax); i < imax; i++) {
+    Node *n = lp()->fast_out(i);
+    if (in_bb(n) && (n->is_Phi() && n->bottom_type() == Type::MEMORY)) {
+      Node* n_tail  = n->in(LoopNode::LoopBackControl);
+      _mem_slice_head.push(n);
+      _mem_slice_tail.push(n_tail);
+    }
+  }
+
+  // Create an RPO list of nodes in block
+
+  visited_clear();
+  post_visited_clear();
+
+  // Push all non-control nodes with no inputs from within block, then control entry
+  for (int j = 0; j < _data_entry.length(); j++) {
+    Node* n = _data_entry.at(j);
+    visited_set(n);
+    _stk.push(n);
+  }
+  visited_set(entry);
+  _stk.push(entry);
+
+  // Do a depth first walk over out edges
+  int rpo_idx = bb_ct - 1;
+  int size;
+  while ((size = _stk.length()) > 0) {
+    Node* n = _stk.top(); // Leave node on stack
+    if (!visited_test_set(n)) {
+      // forward arc in graph
+    } else if (!post_visited_test(n)) {
+      // cross or back arc
+      for (DUIterator_Fast imax, i = n->fast_outs(imax); i < imax; i++) {
+        Node *use = n->fast_out(i);
+        if (in_bb(use) && !visited_test(use) &&
+            // Don't go around backedge
+            (!use->is_Phi() || n == entry)) {
+          _stk.push(use);
+        }
+      }
+      if (_stk.length() == size) {
+        // There were no additional uses, post visit node now
+        _stk.pop(); // Remove node from stack
+        assert(rpo_idx >= 0, "");
+        _block.at_put_grow(rpo_idx, n);
+        rpo_idx--;
+        post_visited_set(n);
+        assert(rpo_idx >= 0 || _stk.is_empty(), "");
+      }
+    } else {
+      _stk.pop(); // Remove post-visited node from stack
+    }
+  }
+
+  // Create real map of block indices for nodes
+  for (int j = 0; j < _block.length(); j++) {
+    Node* n = _block.at(j);
+    set_bb_idx(n, j);
+  }
+
+  initialize_bb(); // Ensure extra info is allocated.
+
+#ifndef PRODUCT
+  if (TraceSuperWord) {
+    print_bb();
+    tty->print_cr("\ndata entry nodes: %s", _data_entry.length() > 0 ? "" : "NONE");
+    for (int m = 0; m < _data_entry.length(); m++) {
+      tty->print("%3d ", m);
+      _data_entry.at(m)->dump();
+    }
+    tty->print_cr("\nmemory slices: %s", _mem_slice_head.length() > 0 ? "" : "NONE");
+    for (int m = 0; m < _mem_slice_head.length(); m++) {
+      tty->print("%3d ", m); _mem_slice_head.at(m)->dump();
+      tty->print("    ");    _mem_slice_tail.at(m)->dump();
+    }
+  }
+#endif
+  assert(rpo_idx == -1 && bb_ct == _block.length(), "all block members found");
+}
+
+//------------------------------initialize_bb---------------------------
+// Initialize per node info
+void SuperWord::initialize_bb() {
+  Node* last = _block.at(_block.length() - 1);
+  grow_node_info(bb_idx(last));
+}
+
+//------------------------------bb_insert_after---------------------------
+// Insert n into block after pos
+void SuperWord::bb_insert_after(Node* n, int pos) {
+  int n_pos = pos + 1;
+  // Make room
+  for (int i = _block.length() - 1; i >= n_pos; i--) {
+    _block.at_put_grow(i+1, _block.at(i));
+  }
+  for (int j = _node_info.length() - 1; j >= n_pos; j--) {
+    _node_info.at_put_grow(j+1, _node_info.at(j));
+  }
+  // Set value
+  _block.at_put_grow(n_pos, n);
+  _node_info.at_put_grow(n_pos, SWNodeInfo::initial);
+  // Adjust map from node->_idx to _block index
+  for (int i = n_pos; i < _block.length(); i++) {
+    set_bb_idx(_block.at(i), i);
+  }
+}
+
+//------------------------------compute_max_depth---------------------------
+// Compute max depth for expressions from beginning of block
+// Use to prune search paths during test for independence.
+void SuperWord::compute_max_depth() {
+  int ct = 0;
+  bool again;
+  do {
+    again = false;
+    for (int i = 0; i < _block.length(); i++) {
+      Node* n = _block.at(i);
+      if (!n->is_Phi()) {
+        int d_orig = depth(n);
+        int d_in   = 0;
+        for (DepPreds preds(n, _dg); !preds.done(); preds.next()) {
+          Node* pred = preds.current();
+          if (in_bb(pred)) {
+            d_in = MAX2(d_in, depth(pred));
+          }
+        }
+        if (d_in + 1 != d_orig) {
+          set_depth(n, d_in + 1);
+          again = true;
+        }
+      }
+    }
+    ct++;
+  } while (again);
+#ifndef PRODUCT
+  if (TraceSuperWord && Verbose)
+    tty->print_cr("compute_max_depth iterated: %d times", ct);
+#endif
+}
+
+//-------------------------compute_vector_element_type-----------------------
+// Compute necessary vector element type for expressions
+// This propagates backwards a narrower integer type when the
+// upper bits of the value are not needed.
+// Example:  char a,b,c;  a = b + c;
+// Normally the type of the add is integer, but for packed character
+// operations the type of the add needs to be char.
+void SuperWord::compute_vector_element_type() {
+#ifndef PRODUCT
+  if (TraceSuperWord && Verbose)
+    tty->print_cr("\ncompute_velt_type:");
+#endif
+
+  // Initial type
+  for (int i = 0; i < _block.length(); i++) {
+    Node* n = _block.at(i);
+    const Type* t  = n->is_Mem() ? Type::get_const_basic_type(n->as_Mem()->memory_type())
+                                 : _igvn.type(n);
+    const Type* vt = container_type(t);
+    set_velt_type(n, vt);
+  }
+
+  // Propagate narrowed type backwards through operations
+  // that don't depend on higher order bits
+  for (int i = _block.length() - 1; i >= 0; i--) {
+    Node* n = _block.at(i);
+    // Only integer types need be examined
+    if (n->bottom_type()->isa_int()) {
+      uint start, end;
+      vector_opd_range(n, &start, &end);
+      const Type* vt = velt_type(n);
+
+      for (uint j = start; j < end; j++) {
+        Node* in  = n->in(j);
+        // Don't propagate through a type conversion
+        if (n->bottom_type() != in->bottom_type())
+          continue;
+        switch(in->Opcode()) {
+        case Op_AddI:    case Op_AddL:
+        case Op_SubI:    case Op_SubL:
+        case Op_MulI:    case Op_MulL:
+        case Op_AndI:    case Op_AndL:
+        case Op_OrI:     case Op_OrL:
+        case Op_XorI:    case Op_XorL:
+        case Op_LShiftI: case Op_LShiftL:
+        case Op_CMoveI:  case Op_CMoveL:
+          if (in_bb(in)) {
+            bool same_type = true;
+            for (DUIterator_Fast kmax, k = in->fast_outs(kmax); k < kmax; k++) {
+              Node *use = in->fast_out(k);
+              if (!in_bb(use) || velt_type(use) != vt) {
+                same_type = false;
+                break;
+              }
+            }
+            if (same_type) {
+              set_velt_type(in, vt);
+            }
+          }
+        }
+      }
+    }
+  }
+#ifndef PRODUCT
+  if (TraceSuperWord && Verbose) {
+    for (int i = 0; i < _block.length(); i++) {
+      Node* n = _block.at(i);
+      velt_type(n)->dump();
+      tty->print("\t");
+      n->dump();
+    }
+  }
+#endif
+}
+
+//------------------------------memory_alignment---------------------------
+// Alignment within a vector memory reference
+int SuperWord::memory_alignment(MemNode* s, int iv_adjust_in_bytes) {
+  SWPointer p(s, this);
+  if (!p.valid()) {
+    return bottom_align;
+  }
+  int offset  = p.offset_in_bytes();
+  offset     += iv_adjust_in_bytes;
+  int off_rem = offset % vector_width_in_bytes();
+  int off_mod = off_rem >= 0 ? off_rem : off_rem + vector_width_in_bytes();
+  return off_mod;
+}
+
+//---------------------------container_type---------------------------
+// Smallest type containing range of values
+const Type* SuperWord::container_type(const Type* t) {
+  if (t->isa_aryptr()) {
+    t = t->is_aryptr()->elem();
+  }
+  if (t->basic_type() == T_INT) {
+    if (t->higher_equal(TypeInt::BOOL))  return TypeInt::BOOL;
+    if (t->higher_equal(TypeInt::BYTE))  return TypeInt::BYTE;
+    if (t->higher_equal(TypeInt::CHAR))  return TypeInt::CHAR;
+    if (t->higher_equal(TypeInt::SHORT)) return TypeInt::SHORT;
+    return TypeInt::INT;
+  }
+  return t;
+}
+
+//-------------------------vector_opd_range-----------------------
+// (Start, end] half-open range defining which operands are vector
+void SuperWord::vector_opd_range(Node* n, uint* start, uint* end) {
+  switch (n->Opcode()) {
+  case Op_LoadB:   case Op_LoadC:
+  case Op_LoadI:   case Op_LoadL:
+  case Op_LoadF:   case Op_LoadD:
+  case Op_LoadP:
+    *start = 0;
+    *end   = 0;
+    return;
+  case Op_StoreB:  case Op_StoreC:
+  case Op_StoreI:  case Op_StoreL:
+  case Op_StoreF:  case Op_StoreD:
+  case Op_StoreP:
+    *start = MemNode::ValueIn;
+    *end   = *start + 1;
+    return;
+  case Op_LShiftI: case Op_LShiftL:
+    *start = 1;
+    *end   = 2;
+    return;
+  case Op_CMoveI:  case Op_CMoveL:  case Op_CMoveF:  case Op_CMoveD:
+    *start = 2;
+    *end   = n->req();
+    return;
+  }
+  *start = 1;
+  *end   = n->req(); // default is all operands
+}
+
+//------------------------------in_packset---------------------------
+// Are s1 and s2 in a pack pair and ordered as s1,s2?
+bool SuperWord::in_packset(Node* s1, Node* s2) {
+  for (int i = 0; i < _packset.length(); i++) {
+    Node_List* p = _packset.at(i);
+    assert(p->size() == 2, "must be");
+    if (p->at(0) == s1 && p->at(p->size()-1) == s2) {
+      return true;
+    }
+  }
+  return false;
+}
+
+//------------------------------in_pack---------------------------
+// Is s in pack p?
+Node_List* SuperWord::in_pack(Node* s, Node_List* p) {
+  for (uint i = 0; i < p->size(); i++) {
+    if (p->at(i) == s) {
+      return p;
+    }
+  }
+  return NULL;
+}
+
+//------------------------------remove_pack_at---------------------------
+// Remove the pack at position pos in the packset
+void SuperWord::remove_pack_at(int pos) {
+  Node_List* p = _packset.at(pos);
+  for (uint i = 0; i < p->size(); i++) {
+    Node* s = p->at(i);
+    set_my_pack(s, NULL);
+  }
+  _packset.remove_at(pos);
+}
+
+//------------------------------executed_first---------------------------
+// Return the node executed first in pack p.  Uses the RPO block list
+// to determine order.
+Node* SuperWord::executed_first(Node_List* p) {
+  Node* n = p->at(0);
+  int n_rpo = bb_idx(n);
+  for (uint i = 1; i < p->size(); i++) {
+    Node* s = p->at(i);
+    int s_rpo = bb_idx(s);
+    if (s_rpo < n_rpo) {
+      n = s;
+      n_rpo = s_rpo;
+    }
+  }
+  return n;
+}
+
+//------------------------------executed_last---------------------------
+// Return the node executed last in pack p.
+Node* SuperWord::executed_last(Node_List* p) {
+  Node* n = p->at(0);
+  int n_rpo = bb_idx(n);
+  for (uint i = 1; i < p->size(); i++) {
+    Node* s = p->at(i);
+    int s_rpo = bb_idx(s);
+    if (s_rpo > n_rpo) {
+      n = s;
+      n_rpo = s_rpo;
+    }
+  }
+  return n;
+}
+
+//----------------------------align_initial_loop_index---------------------------
+// Adjust pre-loop limit so that in main loop, a load/store reference
+// to align_to_ref will be a position zero in the vector.
+//   (iv + k) mod vector_align == 0
+void SuperWord::align_initial_loop_index(MemNode* align_to_ref) {
+  CountedLoopNode *main_head = lp()->as_CountedLoop();
+  assert(main_head->is_main_loop(), "");
+  CountedLoopEndNode* pre_end = get_pre_loop_end(main_head);
+  assert(pre_end != NULL, "");
+  Node *pre_opaq1 = pre_end->limit();
+  assert(pre_opaq1->Opcode() == Op_Opaque1, "");
+  Opaque1Node *pre_opaq = (Opaque1Node*)pre_opaq1;
+  Node *pre_limit = pre_opaq->in(1);
+
+  // Where we put new limit calculations
+  Node *pre_ctrl = pre_end->loopnode()->in(LoopNode::EntryControl);
+
+  // Ensure the original loop limit is available from the
+  // pre-loop Opaque1 node.
+  Node *orig_limit = pre_opaq->original_loop_limit();
+  assert(orig_limit != NULL && _igvn.type(orig_limit) != Type::TOP, "");
+
+  SWPointer align_to_ref_p(align_to_ref, this);
+
+  // Let l0 == original pre_limit, l == new pre_limit, V == v_align
+  //
+  // For stride > 0
+  //   Need l such that l > l0 && (l+k)%V == 0
+  //   Find n such that l = (l0 + n)
+  //   (l0 + n + k) % V == 0
+  //   n = [V - (l0 + k)%V]%V
+  //   new limit = l0 + [V - (l0 + k)%V]%V
+  // For stride < 0
+  //   Need l such that l < l0 && (l+k)%V == 0
+  //   Find n such that l = (l0 - n)
+  //   (l0 - n + k) % V == 0
+  //   n = (l0 + k)%V
+  //   new limit = l0 - (l0 + k)%V
+
+  int elt_size = align_to_ref_p.memory_size();
+  int v_align  = vector_width_in_bytes() / elt_size;
+  int k        = align_to_ref_p.offset_in_bytes() / elt_size;
+
+  Node *kn   = _igvn.intcon(k);
+  Node *limk = new (_phase->C, 3) AddINode(pre_limit, kn);
+  _phase->_igvn.register_new_node_with_optimizer(limk);
+  _phase->set_ctrl(limk, pre_ctrl);
+  if (align_to_ref_p.invar() != NULL) {
+    Node* log2_elt = _igvn.intcon(exact_log2(elt_size));
+    Node* aref     = new (_phase->C, 3) URShiftINode(align_to_ref_p.invar(), log2_elt);
+    _phase->_igvn.register_new_node_with_optimizer(aref);
+    _phase->set_ctrl(aref, pre_ctrl);
+    if (!align_to_ref_p.negate_invar()) {
+      limk = new (_phase->C, 3) AddINode(limk, aref);
+    } else {
+      limk = new (_phase->C, 3) SubINode(limk, aref);
+    }
+    _phase->_igvn.register_new_node_with_optimizer(limk);
+    _phase->set_ctrl(limk, pre_ctrl);
+  }
+  Node* va_msk = _igvn.intcon(v_align - 1);
+  Node* n      = new (_phase->C, 3) AndINode(limk, va_msk);
+  _phase->_igvn.register_new_node_with_optimizer(n);
+  _phase->set_ctrl(n, pre_ctrl);
+  Node* newlim;
+  if (iv_stride() > 0) {
+    Node* va  = _igvn.intcon(v_align);
+    Node* adj = new (_phase->C, 3) SubINode(va, n);
+    _phase->_igvn.register_new_node_with_optimizer(adj);
+    _phase->set_ctrl(adj, pre_ctrl);
+    Node* adj2 = new (_phase->C, 3) AndINode(adj, va_msk);
+    _phase->_igvn.register_new_node_with_optimizer(adj2);
+    _phase->set_ctrl(adj2, pre_ctrl);
+    newlim = new (_phase->C, 3) AddINode(pre_limit, adj2);
+  } else {
+    newlim = new (_phase->C, 3) SubINode(pre_limit, n);
+  }
+  _phase->_igvn.register_new_node_with_optimizer(newlim);
+  _phase->set_ctrl(newlim, pre_ctrl);
+  Node* constrained =
+    (iv_stride() > 0) ? (Node*) new (_phase->C,3) MinINode(newlim, orig_limit)
+                      : (Node*) new (_phase->C,3) MaxINode(newlim, orig_limit);
+  _phase->_igvn.register_new_node_with_optimizer(constrained);
+  _phase->set_ctrl(constrained, pre_ctrl);
+  _igvn.hash_delete(pre_opaq);
+  pre_opaq->set_req(1, constrained);
+}
+
+//----------------------------get_pre_loop_end---------------------------
+// Find pre loop end from main loop.  Returns null if none.
+CountedLoopEndNode* SuperWord::get_pre_loop_end(CountedLoopNode *cl) {
+  Node *ctrl = cl->in(LoopNode::EntryControl);
+  if (!ctrl->is_IfTrue() && !ctrl->is_IfFalse()) return NULL;
+  Node *iffm = ctrl->in(0);
+  if (!iffm->is_If()) return NULL;
+  Node *p_f = iffm->in(0);
+  if (!p_f->is_IfFalse()) return NULL;
+  if (!p_f->in(0)->is_CountedLoopEnd()) return NULL;
+  CountedLoopEndNode *pre_end = p_f->in(0)->as_CountedLoopEnd();
+  if (!pre_end->loopnode()->is_pre_loop()) return NULL;
+  return pre_end;
+}
+
+
+//------------------------------init---------------------------
+void SuperWord::init() {
+  _dg.init();
+  _packset.clear();
+  _disjoint_ptrs.clear();
+  _block.clear();
+  _data_entry.clear();
+  _mem_slice_head.clear();
+  _mem_slice_tail.clear();
+  _node_info.clear();
+  _align_to_ref = NULL;
+  _lpt = NULL;
+  _lp = NULL;
+  _bb = NULL;
+  _iv = NULL;
+}
+
+//------------------------------print_packset---------------------------
+void SuperWord::print_packset() {
+#ifndef PRODUCT
+  tty->print_cr("packset");
+  for (int i = 0; i < _packset.length(); i++) {
+    tty->print_cr("Pack: %d", i);
+    Node_List* p = _packset.at(i);
+    print_pack(p);
+  }
+#endif
+}
+
+//------------------------------print_pack---------------------------
+void SuperWord::print_pack(Node_List* p) {
+  for (uint i = 0; i < p->size(); i++) {
+    print_stmt(p->at(i));
+  }
+}
+
+//------------------------------print_bb---------------------------
+void SuperWord::print_bb() {
+#ifndef PRODUCT
+  tty->print_cr("\nBlock");
+  for (int i = 0; i < _block.length(); i++) {
+    Node* n = _block.at(i);
+    tty->print("%d ", i);
+    if (n) {
+      n->dump();
+    }
+  }
+#endif
+}
+
+//------------------------------print_stmt---------------------------
+void SuperWord::print_stmt(Node* s) {
+#ifndef PRODUCT
+  tty->print(" align: %d \t", alignment(s));
+  s->dump();
+#endif
+}
+
+//------------------------------blank---------------------------
+char* SuperWord::blank(uint depth) {
+  static char blanks[101];
+  assert(depth < 101, "too deep");
+  for (uint i = 0; i < depth; i++) blanks[i] = ' ';
+  blanks[depth] = '\0';
+  return blanks;
+}
+
+
+//==============================SWPointer===========================
+
+//----------------------------SWPointer------------------------
+SWPointer::SWPointer(MemNode* mem, SuperWord* slp) :
+  _mem(mem), _slp(slp),  _base(NULL),  _adr(NULL),
+  _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {
+
+  Node* adr = mem->in(MemNode::Address);
+  if (!adr->is_AddP()) {
+    assert(!valid(), "too complex");
+    return;
+  }
+  // Match AddP(base, AddP(ptr, k*iv [+ invariant]), constant)
+  Node* base = adr->in(AddPNode::Base);
+  for (int i = 0; i < 3; i++) {
+    if (!scaled_iv_plus_offset(adr->in(AddPNode::Offset))) {
+      assert(!valid(), "too complex");
+      return;
+    }
+    adr = adr->in(AddPNode::Address);
+    if (base == adr || !adr->is_AddP()) {
+      break; // stop looking at addp's
+    }
+  }
+  _base = base;
+  _adr  = adr;
+  assert(valid(), "Usable");
+}
+
+// Following is used to create a temporary object during
+// the pattern match of an address expression.
+SWPointer::SWPointer(SWPointer* p) :
+  _mem(p->_mem), _slp(p->_slp),  _base(NULL),  _adr(NULL),
+  _scale(0), _offset(0), _invar(NULL), _negate_invar(false) {}
+
+//------------------------scaled_iv_plus_offset--------------------
+// Match: k*iv + offset
+// where: k is a constant that maybe zero, and
+//        offset is (k2 [+/- invariant]) where k2 maybe zero and invariant is optional
+bool SWPointer::scaled_iv_plus_offset(Node* n) {
+  if (scaled_iv(n)) {
+    return true;
+  }
+  if (offset_plus_k(n)) {
+    return true;
+  }
+  int opc = n->Opcode();
+  if (opc == Op_AddI) {
+    if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2))) {
+      return true;
+    }
+    if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+      return true;
+    }
+  } else if (opc == Op_SubI) {
+    if (scaled_iv(n->in(1)) && offset_plus_k(n->in(2), true)) {
+      return true;
+    }
+    if (scaled_iv(n->in(2)) && offset_plus_k(n->in(1))) {
+      _scale *= -1;
+      return true;
+    }
+  }
+  return false;
+}
+
+//----------------------------scaled_iv------------------------
+// Match: k*iv where k is a constant that's not zero
+bool SWPointer::scaled_iv(Node* n) {
+  if (_scale != 0) {
+    return false;  // already found a scale
+  }
+  if (n == iv()) {
+    _scale = 1;
+    return true;
+  }
+  int opc = n->Opcode();
+  if (opc == Op_MulI) {
+    if (n->in(1) == iv() && n->in(2)->is_Con()) {
+      _scale = n->in(2)->get_int();
+      return true;
+    } else if (n->in(2) == iv() && n->in(1)->is_Con()) {
+      _scale = n->in(1)->get_int();
+      return true;
+    }
+  } else if (opc == Op_LShiftI) {
+    if (n->in(1) == iv() && n->in(2)->is_Con()) {
+      _scale = 1 << n->in(2)->get_int();
+      return true;
+    }
+  } else if (opc == Op_ConvI2L) {
+    if (scaled_iv_plus_offset(n->in(1))) {
+      return true;
+    }
+  } else if (opc == Op_LShiftL) {
+    if (!has_iv() && _invar == NULL) {
+      // Need to preserve the current _offset value, so
+      // create a temporary object for this expression subtree.
+      // Hacky, so should re-engineer the address pattern match.
+      SWPointer tmp(this);
+      if (tmp.scaled_iv_plus_offset(n->in(1))) {
+        if (tmp._invar == NULL) {
+          int mult = 1 << n->in(2)->get_int();
+          _scale   = tmp._scale  * mult;
+          _offset += tmp._offset * mult;
+          return true;
+        }
+      }
+    }
+  }
+  return false;
+}
+
+//----------------------------offset_plus_k------------------------
+// Match: offset is (k [+/- invariant])
+// where k maybe zero and invariant is optional, but not both.
+bool SWPointer::offset_plus_k(Node* n, bool negate) {
+  int opc = n->Opcode();
+  if (opc == Op_ConI) {
+    _offset += negate ? -(n->get_int()) : n->get_int();
+    return true;
+  } else if (opc == Op_ConL) {
+    // Okay if value fits into an int
+    const TypeLong* t = n->find_long_type();
+    if (t->higher_equal(TypeLong::INT)) {
+      jlong loff = n->get_long();
+      jint  off  = (jint)loff;
+      _offset += negate ? -off : loff;
+      return true;
+    }
+    return false;
+  }
+  if (_invar != NULL) return false; // already have an invariant
+  if (opc == Op_AddI) {
+    if (n->in(2)->is_Con() && invariant(n->in(1))) {
+      _negate_invar = negate;
+      _invar = n->in(1);
+      _offset += negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      return true;
+    } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
+      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+      _negate_invar = negate;
+      _invar = n->in(2);
+      return true;
+    }
+  }
+  if (opc == Op_SubI) {
+    if (n->in(2)->is_Con() && invariant(n->in(1))) {
+      _negate_invar = negate;
+      _invar = n->in(1);
+      _offset += !negate ? -(n->in(2)->get_int()) : n->in(2)->get_int();
+      return true;
+    } else if (n->in(1)->is_Con() && invariant(n->in(2))) {
+      _offset += negate ? -(n->in(1)->get_int()) : n->in(1)->get_int();
+      _negate_invar = !negate;
+      _invar = n->in(2);
+      return true;
+    }
+  }
+  if (invariant(n)) {
+    _negate_invar = negate;
+    _invar = n;
+    return true;
+  }
+  return false;
+}
+
+//----------------------------print------------------------
+void SWPointer::print() {
+#ifndef PRODUCT
+  tty->print("base: %d  adr: %d  scale: %d  offset: %d  invar: %c%d\n",
+             _base != NULL ? _base->_idx : 0,
+             _adr  != NULL ? _adr->_idx  : 0,
+             _scale, _offset,
+             _negate_invar?'-':'+',
+             _invar != NULL ? _invar->_idx : 0);
+#endif
+}
+
+// ========================= OrderedPair =====================
+
+const OrderedPair OrderedPair::initial;
+
+// ========================= SWNodeInfo =====================
+
+const SWNodeInfo SWNodeInfo::initial;
+
+
+// ============================ DepGraph ===========================
+
+//------------------------------make_node---------------------------
+// Make a new dependence graph node for an ideal node.
+DepMem* DepGraph::make_node(Node* node) {
+  DepMem* m = new (_arena) DepMem(node);
+  if (node != NULL) {
+    assert(_map.at_grow(node->_idx) == NULL, "one init only");
+    _map.at_put_grow(node->_idx, m);
+  }
+  return m;
+}
+
+//------------------------------make_edge---------------------------
+// Make a new dependence graph edge from dpred -> dsucc
+DepEdge* DepGraph::make_edge(DepMem* dpred, DepMem* dsucc) {
+  DepEdge* e = new (_arena) DepEdge(dpred, dsucc, dsucc->in_head(), dpred->out_head());
+  dpred->set_out_head(e);
+  dsucc->set_in_head(e);
+  return e;
+}
+
+// ========================== DepMem ========================
+
+//------------------------------in_cnt---------------------------
+int DepMem::in_cnt() {
+  int ct = 0;
+  for (DepEdge* e = _in_head; e != NULL; e = e->next_in()) ct++;
+  return ct;
+}
+
+//------------------------------out_cnt---------------------------
+int DepMem::out_cnt() {
+  int ct = 0;
+  for (DepEdge* e = _out_head; e != NULL; e = e->next_out()) ct++;
+  return ct;
+}
+
+//------------------------------print-----------------------------
+void DepMem::print() {
+#ifndef PRODUCT
+  tty->print("  DepNode %d (", _node->_idx);
+  for (DepEdge* p = _in_head; p != NULL; p = p->next_in()) {
+    Node* pred = p->pred()->node();
+    tty->print(" %d", pred != NULL ? pred->_idx : 0);
+  }
+  tty->print(") [");
+  for (DepEdge* s = _out_head; s != NULL; s = s->next_out()) {
+    Node* succ = s->succ()->node();
+    tty->print(" %d", succ != NULL ? succ->_idx : 0);
+  }
+  tty->print_cr(" ]");
+#endif
+}
+
+// =========================== DepEdge =========================
+
+//------------------------------DepPreds---------------------------
+void DepEdge::print() {
+#ifndef PRODUCT
+  tty->print_cr("DepEdge: %d [ %d ]", _pred->node()->_idx, _succ->node()->_idx);
+#endif
+}
+
+// =========================== DepPreds =========================
+// Iterator over predecessor edges in the dependence graph.
+
+//------------------------------DepPreds---------------------------
+DepPreds::DepPreds(Node* n, DepGraph& dg) {
+  _n = n;
+  _done = false;
+  if (_n->is_Store() || _n->is_Load()) {
+    _next_idx = MemNode::Address;
+    _end_idx  = n->req();
+    _dep_next = dg.dep(_n)->in_head();
+  } else if (_n->is_Mem()) {
+    _next_idx = 0;
+    _end_idx  = 0;
+    _dep_next = dg.dep(_n)->in_head();
+  } else {
+    _next_idx = 1;
+    _end_idx  = _n->req();
+    _dep_next = NULL;
+  }
+  next();
+}
+
+//------------------------------next---------------------------
+void DepPreds::next() {
+  if (_dep_next != NULL) {
+    _current  = _dep_next->pred()->node();
+    _dep_next = _dep_next->next_in();
+  } else if (_next_idx < _end_idx) {
+    _current  = _n->in(_next_idx++);
+  } else {
+    _done = true;
+  }
+}
+
+// =========================== DepSuccs =========================
+// Iterator over successor edges in the dependence graph.
+
+//------------------------------DepSuccs---------------------------
+DepSuccs::DepSuccs(Node* n, DepGraph& dg) {
+  _n = n;
+  _done = false;
+  if (_n->is_Load()) {
+    _next_idx = 0;
+    _end_idx  = _n->outcnt();
+    _dep_next = dg.dep(_n)->out_head();
+  } else if (_n->is_Mem() || _n->is_Phi() && _n->bottom_type() == Type::MEMORY) {
+    _next_idx = 0;
+    _end_idx  = 0;
+    _dep_next = dg.dep(_n)->out_head();
+  } else {
+    _next_idx = 0;
+    _end_idx  = _n->outcnt();
+    _dep_next = NULL;
+  }
+  next();
+}
+
+//-------------------------------next---------------------------
+void DepSuccs::next() {
+  if (_dep_next != NULL) {
+    _current  = _dep_next->succ()->node();
+    _dep_next = _dep_next->next_out();
+  } else if (_next_idx < _end_idx) {
+    _current  = _n->raw_out(_next_idx++);
+  } else {
+    _done = true;
+  }
+}
diff --git a/src/share/vm/opto/superword.hpp b/src/share/vm/opto/superword.hpp
new file mode 100644
index 000000000..b60cc83c1
--- /dev/null
+++ b/src/share/vm/opto/superword.hpp
@@ -0,0 +1,506 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+//
+//                  S U P E R W O R D   T R A N S F O R M
+//
+// SuperWords are short, fixed length vectors.
+//
+// Algorithm from:
+//
+// Exploiting SuperWord Level Parallelism with
+//   Multimedia Instruction Sets
+// by
+//   Samuel Larsen and Saman Amarasighe
+//   MIT Laboratory for Computer Science
+// date
+//   May 2000
+// published in
+//   ACM SIGPLAN Notices
+//   Proceedings of ACM PLDI '00,  Volume 35 Issue 5
+//
+// Definition 3.1 A Pack is an n-tuple, <s1, ...,sn>, where
+// s1,...,sn are independent isomorphic statements in a basic
+// block.
+//
+// Definition 3.2 A PackSet is a set of Packs.
+//
+// Definition 3.3 A Pair is a Pack of size two, where the
+// first statement is considered the left element, and the
+// second statement is considered the right element.
+
+class SWPointer;
+class OrderedPair;
+
+// ========================= Dependence Graph =====================
+
+class DepMem;
+
+//------------------------------DepEdge---------------------------
+// An edge in the dependence graph.  The edges incident to a dependence
+// node are threaded through _next_in for incoming edges and _next_out
+// for outgoing edges.
+class DepEdge : public ResourceObj {
+ protected:
+  DepMem* _pred;
+  DepMem* _succ;
+  DepEdge* _next_in;   // list of in edges, null terminated
+  DepEdge* _next_out;  // list of out edges, null terminated
+
+ public:
+  DepEdge(DepMem* pred, DepMem* succ, DepEdge* next_in, DepEdge* next_out) :
+    _pred(pred), _succ(succ), _next_in(next_in), _next_out(next_out) {}
+
+  DepEdge* next_in()  { return _next_in; }
+  DepEdge* next_out() { return _next_out; }
+  DepMem*  pred()     { return _pred; }
+  DepMem*  succ()     { return _succ; }
+
+  void print();
+};
+
+//------------------------------DepMem---------------------------
+// A node in the dependence graph.  _in_head starts the threaded list of
+// incoming edges, and _out_head starts the list of outgoing edges.
+class DepMem : public ResourceObj {
+ protected:
+  Node*    _node;     // Corresponding ideal node
+  DepEdge* _in_head;  // Head of list of in edges, null terminated
+  DepEdge* _out_head; // Head of list of out edges, null terminated
+
+ public:
+  DepMem(Node* node) : _node(node), _in_head(NULL), _out_head(NULL) {}
+
+  Node*    node()                { return _node;     }
+  DepEdge* in_head()             { return _in_head;  }
+  DepEdge* out_head()            { return _out_head; }
+  void set_in_head(DepEdge* hd)  { _in_head = hd;    }
+  void set_out_head(DepEdge* hd) { _out_head = hd;   }
+
+  int in_cnt();  // Incoming edge count
+  int out_cnt(); // Outgoing edge count
+
+  void print();
+};
+
+//------------------------------DepGraph---------------------------
+class DepGraph VALUE_OBJ_CLASS_SPEC {
+ protected:
+  Arena* _arena;
+  GrowableArray<DepMem*> _map;
+  DepMem* _root;
+  DepMem* _tail;
+
+ public:
+  DepGraph(Arena* a) : _arena(a), _map(a, 8,  0, NULL) {
+    _root = new (_arena) DepMem(NULL);
+    _tail = new (_arena) DepMem(NULL);
+  }
+
+  DepMem* root() { return _root; }
+  DepMem* tail() { return _tail; }
+
+  // Return dependence node corresponding to an ideal node
+  DepMem* dep(Node* node) { return _map.at(node->_idx); }
+
+  // Make a new dependence graph node for an ideal node.
+  DepMem* make_node(Node* node);
+
+  // Make a new dependence graph edge dprec->dsucc
+  DepEdge* make_edge(DepMem* dpred, DepMem* dsucc);
+
+  DepEdge* make_edge(Node* pred,   Node* succ)   { return make_edge(dep(pred), dep(succ)); }
+  DepEdge* make_edge(DepMem* pred, Node* succ)   { return make_edge(pred,      dep(succ)); }
+  DepEdge* make_edge(Node* pred,   DepMem* succ) { return make_edge(dep(pred), succ);      }
+
+  void init() { _map.clear(); } // initialize
+
+  void print(Node* n)   { dep(n)->print(); }
+  void print(DepMem* d) { d->print(); }
+};
+
+//------------------------------DepPreds---------------------------
+// Iterator over predecessors in the dependence graph and
+// non-memory-graph inputs of ideal nodes.
+class DepPreds : public StackObj {
+private:
+  Node*    _n;
+  int      _next_idx, _end_idx;
+  DepEdge* _dep_next;
+  Node*    _current;
+  bool     _done;
+
+public:
+  DepPreds(Node* n, DepGraph& dg);
+  Node* current() { return _current; }
+  bool  done()    { return _done; }
+  void  next();
+};
+
+//------------------------------DepSuccs---------------------------
+// Iterator over successors in the dependence graph and
+// non-memory-graph outputs of ideal nodes.
+class DepSuccs : public StackObj {
+private:
+  Node*    _n;
+  int      _next_idx, _end_idx;
+  DepEdge* _dep_next;
+  Node*    _current;
+  bool     _done;
+
+public:
+  DepSuccs(Node* n, DepGraph& dg);
+  Node* current() { return _current; }
+  bool  done()    { return _done; }
+  void  next();
+};
+
+
+// ========================= SuperWord =====================
+
+// -----------------------------SWNodeInfo---------------------------------
+// Per node info needed by SuperWord
+class SWNodeInfo VALUE_OBJ_CLASS_SPEC {
+ public:
+  int         _alignment; // memory alignment for a node
+  int         _depth;     // Max expression (DAG) depth from block start
+  const Type* _velt_type; // vector element type
+  Node_List*  _my_pack;   // pack containing this node
+
+  SWNodeInfo() : _alignment(-1), _depth(0), _velt_type(NULL), _my_pack(NULL) {}
+  static const SWNodeInfo initial;
+};
+
+// -----------------------------SuperWord---------------------------------
+// Transforms scalar operations into packed (superword) operations.
+class SuperWord : public ResourceObj {
+ private:
+  PhaseIdealLoop* _phase;
+  Arena*          _arena;
+  PhaseIterGVN   &_igvn;
+
+  enum consts { top_align = -1, bottom_align = -666 };
+
+  GrowableArray<Node_List*> _packset;    // Packs for the current block
+
+  GrowableArray<int> _bb_idx;            // Map from Node _idx to index within block
+
+  GrowableArray<Node*> _block;           // Nodes in current block
+  GrowableArray<Node*> _data_entry;      // Nodes with all inputs from outside
+  GrowableArray<Node*> _mem_slice_head;  // Memory slice head nodes
+  GrowableArray<Node*> _mem_slice_tail;  // Memory slice tail nodes
+
+  GrowableArray<SWNodeInfo> _node_info;  // Info needed per node
+
+  MemNode* _align_to_ref;                // Memory reference that pre-loop will align to
+
+  GrowableArray<OrderedPair> _disjoint_ptrs; // runtime disambiguated pointer pairs
+
+  DepGraph _dg; // Dependence graph
+
+  // Scratch pads
+  VectorSet    _visited;       // Visited set
+  VectorSet    _post_visited;  // Post-visited set
+  Node_Stack   _n_idx_list;    // List of (node,index) pairs
+  GrowableArray<Node*> _nlist; // List of nodes
+  GrowableArray<Node*> _stk;   // Stack of nodes
+
+ public:
+  SuperWord(PhaseIdealLoop* phase);
+
+  void transform_loop(IdealLoopTree* lpt);
+
+  // Accessors for SWPointer
+  PhaseIdealLoop* phase()          { return _phase; }
+  IdealLoopTree* lpt()             { return _lpt; }
+  PhiNode* iv()                    { return _iv; }
+
+ private:
+  IdealLoopTree* _lpt;             // Current loop tree node
+  LoopNode*      _lp;              // Current LoopNode
+  Node*          _bb;              // Current basic block
+  PhiNode*       _iv;              // Induction var
+
+  // Accessors
+  Arena* arena()                   { return _arena; }
+
+  Node* bb()                       { return _bb; }
+  void  set_bb(Node* bb)           { _bb = bb; }
+
+  void set_lpt(IdealLoopTree* lpt) { _lpt = lpt; }
+
+  LoopNode* lp()                   { return _lp; }
+  void      set_lp(LoopNode* lp)   { _lp = lp;
+                                     _iv = lp->as_CountedLoop()->phi()->as_Phi(); }
+  int      iv_stride()             { return lp()->as_CountedLoop()->stride_con(); }
+
+  int vector_width_in_bytes()      { return Matcher::vector_width_in_bytes(); }
+
+  MemNode* align_to_ref()            { return _align_to_ref; }
+  void  set_align_to_ref(MemNode* m) { _align_to_ref = m; }
+
+  Node* ctrl(Node* n) const { return _phase->has_ctrl(n) ? _phase->get_ctrl(n) : n; }
+
+  // block accessors
+  bool in_bb(Node* n)      { return n != NULL && n->outcnt() > 0 && ctrl(n) == _bb; }
+  int  bb_idx(Node* n)     { assert(in_bb(n), "must be"); return _bb_idx.at(n->_idx); }
+  void set_bb_idx(Node* n, int i) { _bb_idx.at_put_grow(n->_idx, i); }
+
+  // visited set accessors
+  void visited_clear()           { _visited.Clear(); }
+  void visited_set(Node* n)      { return _visited.set(bb_idx(n)); }
+  int visited_test(Node* n)      { return _visited.test(bb_idx(n)); }
+  int visited_test_set(Node* n)  { return _visited.test_set(bb_idx(n)); }
+  void post_visited_clear()      { _post_visited.Clear(); }
+  void post_visited_set(Node* n) { return _post_visited.set(bb_idx(n)); }
+  int post_visited_test(Node* n) { return _post_visited.test(bb_idx(n)); }
+
+  // Ensure node_info contains element "i"
+  void grow_node_info(int i) { if (i >= _node_info.length()) _node_info.at_put_grow(i, SWNodeInfo::initial); }
+
+  // memory alignment for a node
+  int alignment(Node* n)                     { return _node_info.adr_at(bb_idx(n))->_alignment; }
+  void set_alignment(Node* n, int a)         { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_alignment = a; }
+
+  // Max expression (DAG) depth from beginning of the block for each node
+  int depth(Node* n)                         { return _node_info.adr_at(bb_idx(n))->_depth; }
+  void set_depth(Node* n, int d)             { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_depth = d; }
+
+  // vector element type
+  const Type* velt_type(Node* n)             { return _node_info.adr_at(bb_idx(n))->_velt_type; }
+  void set_velt_type(Node* n, const Type* t) { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_velt_type = t; }
+
+  // my_pack
+  Node_List* my_pack(Node* n)                { return !in_bb(n) ? NULL : _node_info.adr_at(bb_idx(n))->_my_pack; }
+  void set_my_pack(Node* n, Node_List* p)    { int i = bb_idx(n); grow_node_info(i); _node_info.adr_at(i)->_my_pack = p; }
+
+  // methods
+
+  // Extract the superword level parallelism
+  void SLP_extract();
+  // Find the adjacent memory references and create pack pairs for them.
+  void find_adjacent_refs();
+  // Find a memory reference to align the loop induction variable to.
+  void find_align_to_ref(Node_List &memops);
+  // Can the preloop align the reference to position zero in the vector?
+  bool ref_is_alignable(SWPointer& p);
+  // Construct dependency graph.
+  void dependence_graph();
+  // Return a memory slice (node list) in predecessor order starting at "start"
+  void mem_slice_preds(Node* start, Node* stop, GrowableArray<Node*> &preds);
+  // Can s1 and s2 be in a pack with s1 immediately preceeding s2 and  s1 aligned at "align"
+  bool stmts_can_pack(Node* s1, Node* s2, int align);
+  // Does s exist in a pack at position pos?
+  bool exists_at(Node* s, uint pos);
+  // Is s1 immediately before s2 in memory?
+  bool are_adjacent_refs(Node* s1, Node* s2);
+  // Are s1 and s2 similar?
+  bool isomorphic(Node* s1, Node* s2);
+  // Is there no data path from s1 to s2 or s2 to s1?
+  bool independent(Node* s1, Node* s2);
+  // Helper for independent
+  bool independent_path(Node* shallow, Node* deep, uint dp=0);
+  void set_alignment(Node* s1, Node* s2, int align);
+  int data_size(Node* s);
+  // Extend packset by following use->def and def->use links from pack members.
+  void extend_packlist();
+  // Extend the packset by visiting operand definitions of nodes in pack p
+  bool follow_use_defs(Node_List* p);
+  // Extend the packset by visiting uses of nodes in pack p
+  bool follow_def_uses(Node_List* p);
+  // Estimate the savings from executing s1 and s2 as a pack
+  int est_savings(Node* s1, Node* s2);
+  int adjacent_profit(Node* s1, Node* s2);
+  int pack_cost(int ct);
+  int unpack_cost(int ct);
+  // Combine packs A and B with A.last == B.first into A.first..,A.last,B.second,..B.last
+  void combine_packs();
+  // Construct the map from nodes to packs.
+  void construct_my_pack_map();
+  // Remove packs that are not implemented or not profitable.
+  void filter_packs();
+  // Adjust the memory graph for the packed operations
+  void schedule();
+  // Within a pack, move stores down to the last executed store,
+  // and move loads up to the first executed load.
+  void co_locate_pack(Node_List* p);
+  // Convert packs into vector node operations
+  void output();
+  // Create a vector operand for the nodes in pack p for operand: in(opd_idx)
+  VectorNode* vector_opd(Node_List* p, int opd_idx);
+  // Can code be generated for pack p?
+  bool implemented(Node_List* p);
+  // For pack p, are all operands and all uses (with in the block) vector?
+  bool profitable(Node_List* p);
+  // If a use of pack p is not a vector use, then replace the use with an extract operation.
+  void insert_extracts(Node_List* p);
+  // Is use->in(u_idx) a vector use?
+  bool is_vector_use(Node* use, int u_idx);
+  // Construct reverse postorder list of block members
+  void construct_bb();
+  // Initialize per node info
+  void initialize_bb();
+  // Insert n into block after pos
+  void bb_insert_after(Node* n, int pos);
+  // Compute max depth for expressions from beginning of block
+  void compute_max_depth();
+  // Compute necessary vector element type for expressions
+  void compute_vector_element_type();
+  // Are s1 and s2 in a pack pair and ordered as s1,s2?
+  bool in_packset(Node* s1, Node* s2);
+  // Is s in pack p?
+  Node_List* in_pack(Node* s, Node_List* p);
+  // Remove the pack at position pos in the packset
+  void remove_pack_at(int pos);
+  // Return the node executed first in pack p.
+  Node* executed_first(Node_List* p);
+  // Return the node executed last in pack p.
+  Node* executed_last(Node_List* p);
+  // Alignment within a vector memory reference
+  int memory_alignment(MemNode* s, int iv_adjust_in_bytes);
+  // (Start, end] half-open range defining which operands are vector
+  void vector_opd_range(Node* n, uint* start, uint* end);
+  // Smallest type containing range of values
+  static const Type* container_type(const Type* t);
+  // Adjust pre-loop limit so that in main loop, a load/store reference
+  // to align_to_ref will be a position zero in the vector.
+  void align_initial_loop_index(MemNode* align_to_ref);
+  // Find pre loop end from main loop.  Returns null if none.
+  CountedLoopEndNode* get_pre_loop_end(CountedLoopNode *cl);
+  // Is the use of d1 in u1 at the same operand position as d2 in u2?
+  bool opnd_positions_match(Node* d1, Node* u1, Node* d2, Node* u2);
+  void init();
+
+  // print methods
+  void print_packset();
+  void print_pack(Node_List* p);
+  void print_bb();
+  void print_stmt(Node* s);
+  char* blank(uint depth);
+};
+
+
+//------------------------------SWPointer---------------------------
+// Information about an address for dependence checking and vector alignment
+class SWPointer VALUE_OBJ_CLASS_SPEC {
+ protected:
+  MemNode*   _mem;     // My memory reference node
+  SuperWord* _slp;     // SuperWord class
+
+  Node* _base;         // NULL if unsafe nonheap reference
+  Node* _adr;          // address pointer
+  jint  _scale;        // multipler for iv (in bytes), 0 if no loop iv
+  jint  _offset;       // constant offset (in bytes)
+  Node* _invar;        // invariant offset (in bytes), NULL if none
+  bool  _negate_invar; // if true then use: (0 - _invar)
+
+  PhaseIdealLoop* phase() { return _slp->phase(); }
+  IdealLoopTree*  lpt()   { return _slp->lpt(); }
+  PhiNode*        iv()    { return _slp->iv();  } // Induction var
+
+  bool invariant(Node* n) {
+    Node *n_c = phase()->get_ctrl(n);
+    return !lpt()->is_member(phase()->get_loop(n_c));
+  }
+
+  // Match: k*iv + offset
+  bool scaled_iv_plus_offset(Node* n);
+  // Match: k*iv where k is a constant that's not zero
+  bool scaled_iv(Node* n);
+  // Match: offset is (k [+/- invariant])
+  bool offset_plus_k(Node* n, bool negate = false);
+
+ public:
+  enum CMP {
+    Less          = 1,
+    Greater       = 2,
+    Equal         = 4,
+    NotEqual      = (Less | Greater),
+    NotComparable = (Less | Greater | Equal)
+  };
+
+  SWPointer(MemNode* mem, SuperWord* slp);
+  // Following is used to create a temporary object during
+  // the pattern match of an address expression.
+  SWPointer(SWPointer* p);
+
+  bool valid()  { return _adr != NULL; }
+  bool has_iv() { return _scale != 0; }
+
+  Node* base()            { return _base; }
+  Node* adr()             { return _adr; }
+  int   scale_in_bytes()  { return _scale; }
+  Node* invar()           { return _invar; }
+  bool  negate_invar()    { return _negate_invar; }
+  int   offset_in_bytes() { return _offset; }
+  int   memory_size()     { return _mem->memory_size(); }
+
+  // Comparable?
+  int cmp(SWPointer& q) {
+    if (valid() && q.valid() &&
+        (_adr == q._adr || _base == _adr && q._base == q._adr) &&
+        _scale == q._scale   &&
+        _invar == q._invar   &&
+        _negate_invar == q._negate_invar) {
+      bool overlap = q._offset <   _offset +   memory_size() &&
+                       _offset < q._offset + q.memory_size();
+      return overlap ? Equal : (_offset < q._offset ? Less : Greater);
+    } else {
+      return NotComparable;
+    }
+  }
+
+  bool not_equal(SWPointer& q)    { return not_equal(cmp(q)); }
+  bool equal(SWPointer& q)        { return equal(cmp(q)); }
+  bool comparable(SWPointer& q)   { return comparable(cmp(q)); }
+  static bool not_equal(int cmp)  { return cmp <= NotEqual; }
+  static bool equal(int cmp)      { return cmp == Equal; }
+  static bool comparable(int cmp) { return cmp < NotComparable; }
+
+  void print();
+};
+
+
+//------------------------------OrderedPair---------------------------
+// Ordered pair of Node*.
+class OrderedPair VALUE_OBJ_CLASS_SPEC {
+ protected:
+  Node* _p1;
+  Node* _p2;
+ public:
+  OrderedPair() : _p1(NULL), _p2(NULL) {}
+  OrderedPair(Node* p1, Node* p2) {
+    if (p1->_idx < p2->_idx) {
+      _p1 = p1; _p2 = p2;
+    } else {
+      _p1 = p2; _p2 = p1;
+    }
+  }
+
+  bool operator==(const OrderedPair &rhs) {
+    return _p1 == rhs._p1 && _p2 == rhs._p2;
+  }
+  void print() { tty->print("  (%d, %d)", _p1->_idx, _p2->_idx); }
+
+  static const OrderedPair initial;
+};
diff --git a/src/share/vm/opto/type.cpp b/src/share/vm/opto/type.cpp
new file mode 100644
index 000000000..e396c9732
--- /dev/null
+++ b/src/share/vm/opto/type.cpp
@@ -0,0 +1,3751 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+#include "incls/_precompiled.incl"
+#include "incls/_type.cpp.incl"
+
+// Dictionary of types shared among compilations.
+Dict* Type::_shared_type_dict = NULL;
+
+// Array which maps compiler types to Basic Types
+const BasicType Type::_basic_type[Type::lastype] = {
+  T_ILLEGAL,    // Bad
+  T_ILLEGAL,    // Control
+  T_VOID,       // Top
+  T_INT,        // Int
+  T_LONG,       // Long
+  T_VOID,       // Half
+
+  T_ILLEGAL,    // Tuple
+  T_ARRAY,      // Array
+
+  T_ADDRESS,    // AnyPtr   // shows up in factory methods for NULL_PTR
+  T_ADDRESS,    // RawPtr
+  T_OBJECT,     // OopPtr
+  T_OBJECT,     // InstPtr
+  T_OBJECT,     // AryPtr
+  T_OBJECT,     // KlassPtr
+
+  T_OBJECT,     // Function
+  T_ILLEGAL,    // Abio
+  T_ADDRESS,    // Return_Address
+  T_ILLEGAL,    // Memory
+  T_FLOAT,      // FloatTop
+  T_FLOAT,      // FloatCon
+  T_FLOAT,      // FloatBot
+  T_DOUBLE,     // DoubleTop
+  T_DOUBLE,     // DoubleCon
+  T_DOUBLE,     // DoubleBot
+  T_ILLEGAL,    // Bottom
+};
+
+// Map ideal registers (machine types) to ideal types
+const Type *Type::mreg2type[_last_machine_leaf];
+
+// Map basic types to canonical Type* pointers.
+const Type* Type::     _const_basic_type[T_CONFLICT+1];
+
+// Map basic types to constant-zero Types.
+const Type* Type::            _zero_type[T_CONFLICT+1];
+
+// Map basic types to array-body alias types.
+const TypeAryPtr* TypeAryPtr::_array_body_type[T_CONFLICT+1];
+
+//=============================================================================
+// Convenience common pre-built types.
+const Type *Type::ABIO;         // State-of-machine only
+const Type *Type::BOTTOM;       // All values
+const Type *Type::CONTROL;      // Control only
+const Type *Type::DOUBLE;       // All doubles
+const Type *Type::FLOAT;        // All floats
+const Type *Type::HALF;         // Placeholder half of doublewide type
+const Type *Type::MEMORY;       // Abstract store only
+const Type *Type::RETURN_ADDRESS;
+const Type *Type::TOP;          // No values in set
+
+//------------------------------get_const_type---------------------------
+const Type* Type::get_const_type(ciType* type) {
+  if (type == NULL) {
+    return NULL;
+  } else if (type->is_primitive_type()) {
+    return get_const_basic_type(type->basic_type());
+  } else {
+    return TypeOopPtr::make_from_klass(type->as_klass());
+  }
+}
+
+//---------------------------array_element_basic_type---------------------------------
+// Mapping to the array element's basic type.
+BasicType Type::array_element_basic_type() const {
+  BasicType bt = basic_type();
+  if (bt == T_INT) {
+    if (this == TypeInt::INT)   return T_INT;
+    if (this == TypeInt::CHAR)  return T_CHAR;
+    if (this == TypeInt::BYTE)  return T_BYTE;
+    if (this == TypeInt::BOOL)  return T_BOOLEAN;
+    if (this == TypeInt::SHORT) return T_SHORT;
+    return T_VOID;
+  }
+  return bt;
+}
+
+//---------------------------get_typeflow_type---------------------------------
+// Import a type produced by ciTypeFlow.
+const Type* Type::get_typeflow_type(ciType* type) {
+  switch (type->basic_type()) {
+
+  case ciTypeFlow::StateVector::T_BOTTOM:
+    assert(type == ciTypeFlow::StateVector::bottom_type(), "");
+    return Type::BOTTOM;
+
+  case ciTypeFlow::StateVector::T_TOP:
+    assert(type == ciTypeFlow::StateVector::top_type(), "");
+    return Type::TOP;
+
+  case ciTypeFlow::StateVector::T_NULL:
+    assert(type == ciTypeFlow::StateVector::null_type(), "");
+    return TypePtr::NULL_PTR;
+
+  case ciTypeFlow::StateVector::T_LONG2:
+    // The ciTypeFlow pass pushes a long, then the half.
+    // We do the same.
+    assert(type == ciTypeFlow::StateVector::long2_type(), "");
+    return TypeInt::TOP;
+
+  case ciTypeFlow::StateVector::T_DOUBLE2:
+    // The ciTypeFlow pass pushes double, then the half.
+    // Our convention is the same.
+    assert(type == ciTypeFlow::StateVector::double2_type(), "");
+    return Type::TOP;
+
+  case T_ADDRESS:
+    assert(type->is_return_address(), "");
+    return TypeRawPtr::make((address)(intptr_t)type->as_return_address()->bci());
+
+  default:
+    // make sure we did not mix up the cases:
+    assert(type != ciTypeFlow::StateVector::bottom_type(), "");
+    assert(type != ciTypeFlow::StateVector::top_type(), "");
+    assert(type != ciTypeFlow::StateVector::null_type(), "");
+    assert(type != ciTypeFlow::StateVector::long2_type(), "");
+    assert(type != ciTypeFlow::StateVector::double2_type(), "");
+    assert(!type->is_return_address(), "");
+
+    return Type::get_const_type(type);
+  }
+}
+
+
+//------------------------------make-------------------------------------------
+// Create a simple Type, with default empty symbol sets.  Then hashcons it
+// and look for an existing copy in the type dictionary.
+const Type *Type::make( enum TYPES t ) {
+  return (new Type(t))->hashcons();
+}
+
+//------------------------------cmp--------------------------------------------
+int Type::cmp( const Type *const t1, const Type *const t2 ) {
+  if( t1->_base != t2->_base )
+    return 1;                   // Missed badly
+  assert(t1 != t2 || t1->eq(t2), "eq must be reflexive");
+  return !t1->eq(t2);           // Return ZERO if equal
+}
+
+//------------------------------hash-------------------------------------------
+int Type::uhash( const Type *const t ) {
+  return t->hash();
+}
+
+//--------------------------Initialize_shared----------------------------------
+void Type::Initialize_shared(Compile* current) {
+  // This method does not need to be locked because the first system
+  // compilations (stub compilations) occur serially.  If they are
+  // changed to proceed in parallel, then this section will need
+  // locking.
+
+  Arena* save = current->type_arena();
+  Arena* shared_type_arena = new Arena();
+
+  current->set_type_arena(shared_type_arena);
+  _shared_type_dict =
+    new (shared_type_arena) Dict( (CmpKey)Type::cmp, (Hash)Type::uhash,
+                                  shared_type_arena, 128 );
+  current->set_type_dict(_shared_type_dict);
+
+  // Make shared pre-built types.
+  CONTROL = make(Control);      // Control only
+  TOP     = make(Top);          // No values in set
+  MEMORY  = make(Memory);       // Abstract store only
+  ABIO    = make(Abio);         // State-of-machine only
+  RETURN_ADDRESS=make(Return_Address);
+  FLOAT   = make(FloatBot);     // All floats
+  DOUBLE  = make(DoubleBot);    // All doubles
+  BOTTOM  = make(Bottom);       // Everything
+  HALF    = make(Half);         // Placeholder half of doublewide type
+
+  TypeF::ZERO = TypeF::make(0.0); // Float 0 (positive zero)
+  TypeF::ONE  = TypeF::make(1.0); // Float 1
+
+  TypeD::ZERO = TypeD::make(0.0); // Double 0 (positive zero)
+  TypeD::ONE  = TypeD::make(1.0); // Double 1
+
+  TypeInt::MINUS_1 = TypeInt::make(-1);  // -1
+  TypeInt::ZERO    = TypeInt::make( 0);  //  0
+  TypeInt::ONE     = TypeInt::make( 1);  //  1
+  TypeInt::BOOL    = TypeInt::make(0,1,   WidenMin);  // 0 or 1, FALSE or TRUE.
+  TypeInt::CC      = TypeInt::make(-1, 1, WidenMin);  // -1, 0 or 1, condition codes
+  TypeInt::CC_LT   = TypeInt::make(-1,-1, WidenMin);  // == TypeInt::MINUS_1
+  TypeInt::CC_GT   = TypeInt::make( 1, 1, WidenMin);  // == TypeInt::ONE
+  TypeInt::CC_EQ   = TypeInt::make( 0, 0, WidenMin);  // == TypeInt::ZERO
+  TypeInt::CC_LE   = TypeInt::make(-1, 0, WidenMin);
+  TypeInt::CC_GE   = TypeInt::make( 0, 1, WidenMin);  // == TypeInt::BOOL
+  TypeInt::BYTE    = TypeInt::make(-128,127,     WidenMin); // Bytes
+  TypeInt::CHAR    = TypeInt::make(0,65535,      WidenMin); // Java chars
+  TypeInt::SHORT   = TypeInt::make(-32768,32767, WidenMin); // Java shorts
+  TypeInt::POS     = TypeInt::make(0,max_jint,   WidenMin); // Non-neg values
+  TypeInt::POS1    = TypeInt::make(1,max_jint,   WidenMin); // Positive values
+  TypeInt::INT     = TypeInt::make(min_jint,max_jint, WidenMax); // 32-bit integers
+  TypeInt::SYMINT  = TypeInt::make(-max_jint,max_jint,WidenMin); // symmetric range
+  // CmpL is overloaded both as the bytecode computation returning
+  // a trinary (-1,0,+1) integer result AND as an efficient long
+  // compare returning optimizer ideal-type flags.
+  assert( TypeInt::CC_LT == TypeInt::MINUS_1, "types must match for CmpL to work" );
+  assert( TypeInt::CC_GT == TypeInt::ONE,     "types must match for CmpL to work" );
+  assert( TypeInt::CC_EQ == TypeInt::ZERO,    "types must match for CmpL to work" );
+  assert( TypeInt::CC_GE == TypeInt::BOOL,    "types must match for CmpL to work" );
+
+  TypeLong::MINUS_1 = TypeLong::make(-1);        // -1
+  TypeLong::ZERO    = TypeLong::make( 0);        //  0
+  TypeLong::ONE     = TypeLong::make( 1);        //  1
+  TypeLong::POS     = TypeLong::make(0,max_jlong, WidenMin); // Non-neg values
+  TypeLong::LONG    = TypeLong::make(min_jlong,max_jlong,WidenMax); // 64-bit integers
+  TypeLong::INT     = TypeLong::make((jlong)min_jint,(jlong)max_jint,WidenMin);
+  TypeLong::UINT    = TypeLong::make(0,(jlong)max_juint,WidenMin);
+
+  const Type **fboth =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  fboth[0] = Type::CONTROL;
+  fboth[1] = Type::CONTROL;
+  TypeTuple::IFBOTH = TypeTuple::make( 2, fboth );
+
+  const Type **ffalse =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  ffalse[0] = Type::CONTROL;
+  ffalse[1] = Type::TOP;
+  TypeTuple::IFFALSE = TypeTuple::make( 2, ffalse );
+
+  const Type **fneither =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  fneither[0] = Type::TOP;
+  fneither[1] = Type::TOP;
+  TypeTuple::IFNEITHER = TypeTuple::make( 2, fneither );
+
+  const Type **ftrue =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  ftrue[0] = Type::TOP;
+  ftrue[1] = Type::CONTROL;
+  TypeTuple::IFTRUE = TypeTuple::make( 2, ftrue );
+
+  const Type **floop =(const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  floop[0] = Type::CONTROL;
+  floop[1] = TypeInt::INT;
+  TypeTuple::LOOPBODY = TypeTuple::make( 2, floop );
+
+  TypePtr::NULL_PTR= TypePtr::make( AnyPtr, TypePtr::Null, 0 );
+  TypePtr::NOTNULL = TypePtr::make( AnyPtr, TypePtr::NotNull, OffsetBot );
+  TypePtr::BOTTOM  = TypePtr::make( AnyPtr, TypePtr::BotPTR, OffsetBot );
+
+  TypeRawPtr::BOTTOM = TypeRawPtr::make( TypePtr::BotPTR );
+  TypeRawPtr::NOTNULL= TypeRawPtr::make( TypePtr::NotNull );
+
+  mreg2type[Op_Node] = Type::BOTTOM;
+  mreg2type[Op_Set ] = 0;
+  mreg2type[Op_RegI] = TypeInt::INT;
+  mreg2type[Op_RegP] = TypePtr::BOTTOM;
+  mreg2type[Op_RegF] = Type::FLOAT;
+  mreg2type[Op_RegD] = Type::DOUBLE;
+  mreg2type[Op_RegL] = TypeLong::LONG;
+  mreg2type[Op_RegFlags] = TypeInt::CC;
+
+  const Type **fmembar = TypeTuple::fields(0);
+  TypeTuple::MEMBAR = TypeTuple::make(TypeFunc::Parms+0, fmembar);
+
+  const Type **fsc = (const Type**)shared_type_arena->Amalloc_4(2*sizeof(Type*));
+  fsc[0] = TypeInt::CC;
+  fsc[1] = Type::MEMORY;
+  TypeTuple::STORECONDITIONAL = TypeTuple::make(2, fsc);
+
+  TypeInstPtr::NOTNULL = TypeInstPtr::make(TypePtr::NotNull, current->env()->Object_klass());
+  TypeInstPtr::BOTTOM  = TypeInstPtr::make(TypePtr::BotPTR,  current->env()->Object_klass());
+  TypeInstPtr::MIRROR  = TypeInstPtr::make(TypePtr::NotNull, current->env()->Class_klass());
+  TypeInstPtr::MARK    = TypeInstPtr::make(TypePtr::BotPTR,  current->env()->Object_klass(),
+                                           false, 0, oopDesc::mark_offset_in_bytes());
+  TypeInstPtr::KLASS   = TypeInstPtr::make(TypePtr::BotPTR,  current->env()->Object_klass(),
+                                           false, 0, oopDesc::klass_offset_in_bytes());
+  TypeOopPtr::BOTTOM  = TypeOopPtr::make(TypePtr::BotPTR, OffsetBot);
+
+  TypeAryPtr::RANGE   = TypeAryPtr::make( TypePtr::BotPTR, TypeAry::make(Type::BOTTOM,TypeInt::POS), current->env()->Object_klass(), false, arrayOopDesc::length_offset_in_bytes());
+  // There is no shared klass for Object[].  See note in TypeAryPtr::klass().
+  TypeAryPtr::OOPS    = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInstPtr::BOTTOM,TypeInt::POS), NULL /*ciArrayKlass::make(o)*/,  false,  Type::OffsetBot);
+  TypeAryPtr::BYTES   = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::BYTE      ,TypeInt::POS), ciTypeArrayKlass::make(T_BYTE),   true,  Type::OffsetBot);
+  TypeAryPtr::SHORTS  = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::SHORT     ,TypeInt::POS), ciTypeArrayKlass::make(T_SHORT),  true,  Type::OffsetBot);
+  TypeAryPtr::CHARS   = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::CHAR      ,TypeInt::POS), ciTypeArrayKlass::make(T_CHAR),   true,  Type::OffsetBot);
+  TypeAryPtr::INTS    = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeInt::INT       ,TypeInt::POS), ciTypeArrayKlass::make(T_INT),    true,  Type::OffsetBot);
+  TypeAryPtr::LONGS   = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(TypeLong::LONG     ,TypeInt::POS), ciTypeArrayKlass::make(T_LONG),   true,  Type::OffsetBot);
+  TypeAryPtr::FLOATS  = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(Type::FLOAT        ,TypeInt::POS), ciTypeArrayKlass::make(T_FLOAT),  true,  Type::OffsetBot);
+  TypeAryPtr::DOUBLES = TypeAryPtr::make(TypePtr::BotPTR, TypeAry::make(Type::DOUBLE       ,TypeInt::POS), ciTypeArrayKlass::make(T_DOUBLE), true,  Type::OffsetBot);
+
+  TypeAryPtr::_array_body_type[T_OBJECT]  = TypeAryPtr::OOPS;
+  TypeAryPtr::_array_body_type[T_ARRAY]   = TypeAryPtr::OOPS;   // arrays are stored in oop arrays
+  TypeAryPtr::_array_body_type[T_BYTE]    = TypeAryPtr::BYTES;
+  TypeAryPtr::_array_body_type[T_BOOLEAN] = TypeAryPtr::BYTES;  // boolean[] is a byte array
+  TypeAryPtr::_array_body_type[T_SHORT]   = TypeAryPtr::SHORTS;
+  TypeAryPtr::_array_body_type[T_CHAR]    = TypeAryPtr::CHARS;
+  TypeAryPtr::_array_body_type[T_INT]     = TypeAryPtr::INTS;
+  TypeAryPtr::_array_body_type[T_LONG]    = TypeAryPtr::LONGS;
+  TypeAryPtr::_array_body_type[T_FLOAT]   = TypeAryPtr::FLOATS;
+  TypeAryPtr::_array_body_type[T_DOUBLE]  = TypeAryPtr::DOUBLES;
+
+  TypeKlassPtr::OBJECT = TypeKlassPtr::make( TypePtr::NotNull, current->env()->Object_klass(), 0 );
+  TypeKlassPtr::OBJECT_OR_NULL = TypeKlassPtr::make( TypePtr::BotPTR, current->env()->Object_klass(), 0 );
+
+  const Type **fi2c = TypeTuple::fields(2);
+  fi2c[TypeFunc::Parms+0] = TypeInstPtr::BOTTOM; // methodOop
+  fi2c[TypeFunc::Parms+1] = TypeRawPtr::BOTTOM; // argument pointer
+  TypeTuple::START_I2C = TypeTuple::make(TypeFunc::Parms+2, fi2c);
+
+  const Type **intpair = TypeTuple::fields(2);
+  intpair[0] = TypeInt::INT;
+  intpair[1] = TypeInt::INT;
+  TypeTuple::INT_PAIR = TypeTuple::make(2, intpair);
+
+  const Type **longpair = TypeTuple::fields(2);
+  longpair[0] = TypeLong::LONG;
+  longpair[1] = TypeLong::LONG;
+  TypeTuple::LONG_PAIR = TypeTuple::make(2, longpair);
+
+  _const_basic_type[T_BOOLEAN] = TypeInt::BOOL;
+  _const_basic_type[T_CHAR]    = TypeInt::CHAR;
+  _const_basic_type[T_BYTE]    = TypeInt::BYTE;
+  _const_basic_type[T_SHORT]   = TypeInt::SHORT;
+  _const_basic_type[T_INT]     = TypeInt::INT;
+  _const_basic_type[T_LONG]    = TypeLong::LONG;
+  _const_basic_type[T_FLOAT]   = Type::FLOAT;
+  _const_basic_type[T_DOUBLE]  = Type::DOUBLE;
+  _const_basic_type[T_OBJECT]  = TypeInstPtr::BOTTOM;
+  _const_basic_type[T_ARRAY]   = TypeInstPtr::BOTTOM; // there is no separate bottom for arrays
+  _const_basic_type[T_VOID]    = TypePtr::NULL_PTR;   // reflection represents void this way
+  _const_basic_type[T_ADDRESS] = TypeRawPtr::BOTTOM;  // both interpreter return addresses & random raw ptrs
+  _const_basic_type[T_CONFLICT]= Type::BOTTOM;        // why not?
+
+  _zero_type[T_BOOLEAN] = TypeInt::ZERO;     // false == 0
+  _zero_type[T_CHAR]    = TypeInt::ZERO;     // '\0' == 0
+  _zero_type[T_BYTE]    = TypeInt::ZERO;     // 0x00 == 0
+  _zero_type[T_SHORT]   = TypeInt::ZERO;     // 0x0000 == 0
+  _zero_type[T_INT]     = TypeInt::ZERO;
+  _zero_type[T_LONG]    = TypeLong::ZERO;
+  _zero_type[T_FLOAT]   = TypeF::ZERO;
+  _zero_type[T_DOUBLE]  = TypeD::ZERO;
+  _zero_type[T_OBJECT]  = TypePtr::NULL_PTR;
+  _zero_type[T_ARRAY]   = TypePtr::NULL_PTR; // null array is null oop
+  _zero_type[T_ADDRESS] = TypePtr::NULL_PTR; // raw pointers use the same null
+  _zero_type[T_VOID]    = Type::TOP;         // the only void value is no value at all
+
+  // get_zero_type() should not happen for T_CONFLICT
+  _zero_type[T_CONFLICT]= NULL;
+
+  // Restore working type arena.
+  current->set_type_arena(save);
+  current->set_type_dict(NULL);
+}
+
+//------------------------------Initialize-------------------------------------
+void Type::Initialize(Compile* current) {
+  assert(current->type_arena() != NULL, "must have created type arena");
+
+  if (_shared_type_dict == NULL) {
+    Initialize_shared(current);
+  }
+
+  Arena* type_arena = current->type_arena();
+
+  // Create the hash-cons'ing dictionary with top-level storage allocation
+  Dict *tdic = new (type_arena) Dict( (CmpKey)Type::cmp,(Hash)Type::uhash, type_arena, 128 );
+  current->set_type_dict(tdic);
+
+  // Transfer the shared types.
+  DictI i(_shared_type_dict);
+  for( ; i.test(); ++i ) {
+    Type* t = (Type*)i._value;
+    tdic->Insert(t,t);  // New Type, insert into Type table
+  }
+}
+
+//------------------------------hashcons---------------------------------------
+// Do the hash-cons trick.  If the Type already exists in the type table,
+// delete the current Type and return the existing Type.  Otherwise stick the
+// current Type in the Type table.
+const Type *Type::hashcons(void) {
+  debug_only(base());           // Check the assertion in Type::base().
+  // Look up the Type in the Type dictionary
+  Dict *tdic = type_dict();
+  Type* old = (Type*)(tdic->Insert(this, this, false));
+  if( old ) {                   // Pre-existing Type?
+    if( old != this )           // Yes, this guy is not the pre-existing?
+      delete this;              // Yes, Nuke this guy
+    assert( old->_dual, "" );
+    return old;                 // Return pre-existing
+  }
+
+  // Every type has a dual (to make my lattice symmetric).
+  // Since we just discovered a new Type, compute its dual right now.
+  assert( !_dual, "" );         // No dual yet
+  _dual = xdual();              // Compute the dual
+  if( cmp(this,_dual)==0 ) {    // Handle self-symmetric
+    _dual = this;
+    return this;
+  }
+  assert( !_dual->_dual, "" );  // No reverse dual yet
+  assert( !(*tdic)[_dual], "" ); // Dual not in type system either
+  // New Type, insert into Type table
+  tdic->Insert((void*)_dual,(void*)_dual);
+  ((Type*)_dual)->_dual = this; // Finish up being symmetric
+#ifdef ASSERT
+  Type *dual_dual = (Type*)_dual->xdual();
+  assert( eq(dual_dual), "xdual(xdual()) should be identity" );
+  delete dual_dual;
+#endif
+  return this;                  // Return new Type
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool Type::eq( const Type * ) const {
+  return true;                  // Nothing else can go wrong
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int Type::hash(void) const {
+  return _base;
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool Type::is_finite() const {
+  return false;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool Type::is_nan()    const {
+  return false;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  NOT virtual.  It enforces that meet is
+// commutative and the lattice is symmetric.
+const Type *Type::meet( const Type *t ) const {
+  const Type *mt = xmeet(t);
+#ifdef ASSERT
+  assert( mt == t->xmeet(this), "meet not commutative" );
+  const Type* dual_join = mt->_dual;
+  const Type *t2t    = dual_join->xmeet(t->_dual);
+  const Type *t2this = dual_join->xmeet(   _dual);
+
+  // Interface meet Oop is Not Symmetric:
+  // Interface:AnyNull meet Oop:AnyNull == Interface:AnyNull
+  // Interface:NotNull meet Oop:NotNull == java/lang/Object:NotNull
+  const TypeInstPtr* this_inst = this->isa_instptr();
+  const TypeInstPtr*    t_inst =    t->isa_instptr();
+  bool interface_vs_oop = false;
+  if( this_inst && this_inst->is_loaded() && t_inst && t_inst->is_loaded() ) {
+    bool this_interface = this_inst->klass()->is_interface();
+    bool    t_interface =    t_inst->klass()->is_interface();
+    interface_vs_oop = this_interface ^ t_interface;
+  }
+  const Type *tdual = t->_dual;
+  const Type *thisdual = _dual;
+  // strip out instances
+  if (t2t->isa_oopptr() != NULL) {
+    t2t = t2t->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+  }
+  if (t2this->isa_oopptr() != NULL) {
+    t2this = t2this->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+  }
+  if (tdual->isa_oopptr() != NULL) {
+    tdual = tdual->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+  }
+  if (thisdual->isa_oopptr() != NULL) {
+    thisdual = thisdual->isa_oopptr()->cast_to_instance(TypeOopPtr::UNKNOWN_INSTANCE);
+  }
+
+  if( !interface_vs_oop && (t2t != tdual || t2this != thisdual) ) {
+    tty->print_cr("=== Meet Not Symmetric ===");
+    tty->print("t   =                   ");         t->dump(); tty->cr();
+    tty->print("this=                   ");            dump(); tty->cr();
+    tty->print("mt=(t meet this)=       ");        mt->dump(); tty->cr();
+
+    tty->print("t_dual=                 ");  t->_dual->dump(); tty->cr();
+    tty->print("this_dual=              ");     _dual->dump(); tty->cr();
+    tty->print("mt_dual=                "); mt->_dual->dump(); tty->cr();
+
+    tty->print("mt_dual meet t_dual=    "); t2t      ->dump(); tty->cr();
+    tty->print("mt_dual meet this_dual= "); t2this   ->dump(); tty->cr();
+
+    fatal("meet not symmetric" );
+  }
+#endif
+  return mt;
+}
+
+//------------------------------xmeet------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *Type::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Meeting TOP with anything?
+  if( _base == Top ) return t;
+
+  // Meeting BOTTOM with anything?
+  if( _base == Bottom ) return BOTTOM;
+
+  // Current "this->_base" is one of: Bad, Multi, Control, Top,
+  // Abio, Abstore, Floatxxx, Doublexxx, Bottom, lastype.
+  switch (t->base()) {  // Switch on original type
+
+  // Cut in half the number of cases I must handle.  Only need cases for when
+  // the given enum "t->type" is less than or equal to the local enum "type".
+  case FloatCon:
+  case DoubleCon:
+  case Int:
+  case Long:
+    return t->xmeet(this);
+
+  case OopPtr:
+    return t->xmeet(this);
+
+  case InstPtr:
+    return t->xmeet(this);
+
+  case KlassPtr:
+    return t->xmeet(this);
+
+  case AryPtr:
+    return t->xmeet(this);
+
+  case Bad:                     // Type check
+  default:                      // Bogus type not in lattice
+    typerr(t);
+    return Type::BOTTOM;
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  case FloatTop:
+    if( _base == FloatTop ) return this;
+  case FloatBot:                // Float
+    if( _base == FloatBot || _base == FloatTop ) return FLOAT;
+    if( _base == DoubleTop || _base == DoubleBot ) return Type::BOTTOM;
+    typerr(t);
+    return Type::BOTTOM;
+
+  case DoubleTop:
+    if( _base == DoubleTop ) return this;
+  case DoubleBot:               // Double
+    if( _base == DoubleBot || _base == DoubleTop ) return DOUBLE;
+    if( _base == FloatTop || _base == FloatBot ) return Type::BOTTOM;
+    typerr(t);
+    return Type::BOTTOM;
+
+  // These next few cases must match exactly or it is a compile-time error.
+  case Control:                 // Control of code
+  case Abio:                    // State of world outside of program
+  case Memory:
+    if( _base == t->_base )  return this;
+    typerr(t);
+    return Type::BOTTOM;
+
+  case Top:                     // Top of the lattice
+    return this;
+  }
+
+  // The type is unchanged
+  return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *Type::filter( const Type *kills ) const {
+  const Type* ft = join(kills);
+  if (ft->empty())
+    return Type::TOP;           // Canonical empty value
+  return ft;
+}
+
+//------------------------------xdual------------------------------------------
+// Compute dual right now.
+const Type::TYPES Type::dual_type[Type::lastype] = {
+  Bad,          // Bad
+  Control,      // Control
+  Bottom,       // Top
+  Bad,          // Int - handled in v-call
+  Bad,          // Long - handled in v-call
+  Half,         // Half
+
+  Bad,          // Tuple - handled in v-call
+  Bad,          // Array - handled in v-call
+
+  Bad,          // AnyPtr - handled in v-call
+  Bad,          // RawPtr - handled in v-call
+  Bad,          // OopPtr - handled in v-call
+  Bad,          // InstPtr - handled in v-call
+  Bad,          // AryPtr - handled in v-call
+  Bad,          // KlassPtr - handled in v-call
+
+  Bad,          // Function - handled in v-call
+  Abio,         // Abio
+  Return_Address,// Return_Address
+  Memory,       // Memory
+  FloatBot,     // FloatTop
+  FloatCon,     // FloatCon
+  FloatTop,     // FloatBot
+  DoubleBot,    // DoubleTop
+  DoubleCon,    // DoubleCon
+  DoubleTop,    // DoubleBot
+  Top           // Bottom
+};
+
+const Type *Type::xdual() const {
+  // Note: the base() accessor asserts the sanity of _base.
+  assert(dual_type[base()] != Bad, "implement with v-call");
+  return new Type(dual_type[_base]);
+}
+
+//------------------------------has_memory-------------------------------------
+bool Type::has_memory() const {
+  Type::TYPES tx = base();
+  if (tx == Memory) return true;
+  if (tx == Tuple) {
+    const TypeTuple *t = is_tuple();
+    for (uint i=0; i < t->cnt(); i++) {
+      tx = t->field_at(i)->base();
+      if (tx == Memory)  return true;
+    }
+  }
+  return false;
+}
+
+#ifndef PRODUCT
+//------------------------------dump2------------------------------------------
+void Type::dump2( Dict &d, uint depth, outputStream *st ) const {
+  st->print(msg[_base]);
+}
+
+//------------------------------dump-------------------------------------------
+void Type::dump_on(outputStream *st) const {
+  ResourceMark rm;
+  Dict d(cmpkey,hashkey);       // Stop recursive type dumping
+  dump2(d,1, st);
+}
+
+//------------------------------data-------------------------------------------
+const char * const Type::msg[Type::lastype] = {
+  "bad","control","top","int:","long:","half",
+  "tuple:", "aryptr",
+  "anyptr:", "rawptr:", "java:", "inst:", "ary:", "klass:",
+  "func", "abIO", "return_address", "memory",
+  "float_top", "ftcon:", "float",
+  "double_top", "dblcon:", "double",
+  "bottom"
+};
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants.
+bool Type::singleton(void) const {
+  return _base == Top || _base == Half;
+}
+
+//------------------------------empty------------------------------------------
+// TRUE if Type is a type with no values, FALSE otherwise.
+bool Type::empty(void) const {
+  switch (_base) {
+  case DoubleTop:
+  case FloatTop:
+  case Top:
+    return true;
+
+  case Half:
+  case Abio:
+  case Return_Address:
+  case Memory:
+  case Bottom:
+  case FloatBot:
+  case DoubleBot:
+    return false;  // never a singleton, therefore never empty
+  }
+
+  ShouldNotReachHere();
+  return false;
+}
+
+//------------------------------dump_stats-------------------------------------
+// Dump collected statistics to stderr
+#ifndef PRODUCT
+void Type::dump_stats() {
+  tty->print("Types made: %d\n", type_dict()->Size());
+}
+#endif
+
+//------------------------------typerr-----------------------------------------
+void Type::typerr( const Type *t ) const {
+#ifndef PRODUCT
+  tty->print("\nError mixing types: ");
+  dump();
+  tty->print(" and ");
+  t->dump();
+  tty->print("\n");
+#endif
+  ShouldNotReachHere();
+}
+
+//------------------------------isa_oop_ptr------------------------------------
+// Return true if type is an oop pointer type.  False for raw pointers.
+static char isa_oop_ptr_tbl[Type::lastype] = {
+  0,0,0,0,0,0,0/*tuple*/, 0/*ary*/,
+  0/*anyptr*/,0/*rawptr*/,1/*OopPtr*/,1/*InstPtr*/,1/*AryPtr*/,1/*KlassPtr*/,
+  0/*func*/,0,0/*return_address*/,0,
+  /*floats*/0,0,0, /*doubles*/0,0,0,
+  0
+};
+bool Type::isa_oop_ptr() const {
+  return isa_oop_ptr_tbl[_base] != 0;
+}
+
+//------------------------------dump_stats-------------------------------------
+// // Check that arrays match type enum
+#ifndef PRODUCT
+void Type::verify_lastype() {
+  // Check that arrays match enumeration
+  assert( Type::dual_type  [Type::lastype - 1] == Type::Top, "did not update array");
+  assert( strcmp(Type::msg [Type::lastype - 1],"bottom") == 0, "did not update array");
+  // assert( PhiNode::tbl     [Type::lastype - 1] == NULL,    "did not update array");
+  assert( Matcher::base2reg[Type::lastype - 1] == 0,      "did not update array");
+  assert( isa_oop_ptr_tbl  [Type::lastype - 1] == (char)0,  "did not update array");
+}
+#endif
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeF *TypeF::ZERO;       // Floating point zero
+const TypeF *TypeF::ONE;        // Floating point one
+
+//------------------------------make-------------------------------------------
+// Create a float constant
+const TypeF *TypeF::make(float f) {
+  return (TypeF*)(new TypeF(f))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeF::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is FloatCon
+  switch (t->base()) {          // Switch on original type
+  case AnyPtr:                  // Mixing with oops happens when javac
+  case RawPtr:                  // reuses local variables
+  case OopPtr:
+  case InstPtr:
+  case KlassPtr:
+  case AryPtr:
+  case Int:
+  case Long:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+
+  case FloatBot:
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case FloatCon:                // Float-constant vs Float-constant?
+    if( jint_cast(_f) != jint_cast(t->getf()) )         // unequal constants?
+                                // must compare bitwise as positive zero, negative zero and NaN have
+                                // all the same representation in C++
+      return FLOAT;             // Return generic float
+                                // Equal constants
+  case Top:
+  case FloatTop:
+    break;                      // Return the float constant
+  }
+  return this;                  // Return the float constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: symmetric
+const Type *TypeF::xdual() const {
+  return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeF::eq( const Type *t ) const {
+  if( g_isnan(_f) ||
+      g_isnan(t->getf()) ) {
+    // One or both are NANs.  If both are NANs return true, else false.
+    return (g_isnan(_f) && g_isnan(t->getf()));
+  }
+  if (_f == t->getf()) {
+    // (NaN is impossible at this point, since it is not equal even to itself)
+    if (_f == 0.0) {
+      // difference between positive and negative zero
+      if (jint_cast(_f) != jint_cast(t->getf()))  return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeF::hash(void) const {
+  return *(int*)(&_f);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeF::is_finite() const {
+  return g_isfinite(getf()) != 0;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool TypeF::is_nan()    const {
+  return g_isnan(getf()) != 0;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump float constant Type
+#ifndef PRODUCT
+void TypeF::dump2( Dict &d, uint depth, outputStream *st ) const {
+  Type::dump2(d,depth, st);
+  st->print("%f", _f);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeF::singleton(void) const {
+  return true;                  // Always a singleton
+}
+
+bool TypeF::empty(void) const {
+  return false;                 // always exactly a singleton
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeD *TypeD::ZERO;       // Floating point zero
+const TypeD *TypeD::ONE;        // Floating point one
+
+//------------------------------make-------------------------------------------
+const TypeD *TypeD::make(double d) {
+  return (TypeD*)(new TypeD(d))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeD::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is DoubleCon
+  switch (t->base()) {          // Switch on original type
+  case AnyPtr:                  // Mixing with oops happens when javac
+  case RawPtr:                  // reuses local variables
+  case OopPtr:
+  case InstPtr:
+  case KlassPtr:
+  case AryPtr:
+  case Int:
+  case Long:
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+
+  case DoubleBot:
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case DoubleCon:               // Double-constant vs Double-constant?
+    if( jlong_cast(_d) != jlong_cast(t->getd()) )       // unequal constants? (see comment in TypeF::xmeet)
+      return DOUBLE;            // Return generic double
+  case Top:
+  case DoubleTop:
+    break;
+  }
+  return this;                  // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: symmetric
+const Type *TypeD::xdual() const {
+  return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeD::eq( const Type *t ) const {
+  if( g_isnan(_d) ||
+      g_isnan(t->getd()) ) {
+    // One or both are NANs.  If both are NANs return true, else false.
+    return (g_isnan(_d) && g_isnan(t->getd()));
+  }
+  if (_d == t->getd()) {
+    // (NaN is impossible at this point, since it is not equal even to itself)
+    if (_d == 0.0) {
+      // difference between positive and negative zero
+      if (jlong_cast(_d) != jlong_cast(t->getd()))  return false;
+    }
+    return true;
+  }
+  return false;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeD::hash(void) const {
+  return *(int*)(&_d);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeD::is_finite() const {
+  return g_isfinite(getd()) != 0;
+}
+
+//------------------------------is_nan-----------------------------------------
+// Is not a number (NaN)
+bool TypeD::is_nan()    const {
+  return g_isnan(getd()) != 0;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump double constant Type
+#ifndef PRODUCT
+void TypeD::dump2( Dict &d, uint depth, outputStream *st ) const {
+  Type::dump2(d,depth,st);
+  st->print("%f", _d);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeD::singleton(void) const {
+  return true;                  // Always a singleton
+}
+
+bool TypeD::empty(void) const {
+  return false;                 // always exactly a singleton
+}
+
+//=============================================================================
+// Convience common pre-built types.
+const TypeInt *TypeInt::MINUS_1;// -1
+const TypeInt *TypeInt::ZERO;   // 0
+const TypeInt *TypeInt::ONE;    // 1
+const TypeInt *TypeInt::BOOL;   // 0 or 1, FALSE or TRUE.
+const TypeInt *TypeInt::CC;     // -1,0 or 1, condition codes
+const TypeInt *TypeInt::CC_LT;  // [-1]  == MINUS_1
+const TypeInt *TypeInt::CC_GT;  // [1]   == ONE
+const TypeInt *TypeInt::CC_EQ;  // [0]   == ZERO
+const TypeInt *TypeInt::CC_LE;  // [-1,0]
+const TypeInt *TypeInt::CC_GE;  // [0,1] == BOOL (!)
+const TypeInt *TypeInt::BYTE;   // Bytes, -128 to 127
+const TypeInt *TypeInt::CHAR;   // Java chars, 0-65535
+const TypeInt *TypeInt::SHORT;  // Java shorts, -32768-32767
+const TypeInt *TypeInt::POS;    // Positive 32-bit integers or zero
+const TypeInt *TypeInt::POS1;   // Positive 32-bit integers
+const TypeInt *TypeInt::INT;    // 32-bit integers
+const TypeInt *TypeInt::SYMINT; // symmetric range [-max_jint..max_jint]
+
+//------------------------------TypeInt----------------------------------------
+TypeInt::TypeInt( jint lo, jint hi, int w ) : Type(Int), _lo(lo), _hi(hi), _widen(w) {
+}
+
+//------------------------------make-------------------------------------------
+const TypeInt *TypeInt::make( jint lo ) {
+  return (TypeInt*)(new TypeInt(lo,lo,WidenMin))->hashcons();
+}
+
+#define SMALLINT ((juint)3)  // a value too insignificant to consider widening
+
+const TypeInt *TypeInt::make( jint lo, jint hi, int w ) {
+  // Certain normalizations keep us sane when comparing types.
+  // The 'SMALLINT' covers constants and also CC and its relatives.
+  assert(CC == NULL || (juint)(CC->_hi - CC->_lo) <= SMALLINT, "CC is truly small");
+  if (lo <= hi) {
+    if ((juint)(hi - lo) <= SMALLINT)   w = Type::WidenMin;
+    if ((juint)(hi - lo) >= max_juint)  w = Type::WidenMax; // plain int
+  }
+  return (TypeInt*)(new TypeInt(lo,hi,w))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type representation object
+// with reference count equal to the number of Types pointing at it.
+// Caller should wrap a Types around it.
+const Type *TypeInt::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type?
+
+  // Currently "this->_base" is a TypeInt
+  switch (t->base()) {          // Switch on original type
+  case AnyPtr:                  // Mixing with oops happens when javac
+  case RawPtr:                  // reuses local variables
+  case OopPtr:
+  case InstPtr:
+  case KlassPtr:
+  case AryPtr:
+  case Long:
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  default:                      // All else is a mistake
+    typerr(t);
+  case Top:                     // No change
+    return this;
+  case Int:                     // Int vs Int?
+    break;
+  }
+
+  // Expand covered set
+  const TypeInt *r = t->is_int();
+  // (Avoid TypeInt::make, to avoid the argument normalizations it enforces.)
+  return (new TypeInt( MIN2(_lo,r->_lo), MAX2(_hi,r->_hi), MAX2(_widen,r->_widen) ))->hashcons();
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: reverse hi & lo; flip widen
+const Type *TypeInt::xdual() const {
+  return new TypeInt(_hi,_lo,WidenMax-_widen);
+}
+
+//------------------------------widen------------------------------------------
+// Only happens for optimistic top-down optimizations.
+const Type *TypeInt::widen( const Type *old ) const {
+  // Coming from TOP or such; no widening
+  if( old->base() != Int ) return this;
+  const TypeInt *ot = old->is_int();
+
+  // If new guy is equal to old guy, no widening
+  if( _lo == ot->_lo && _hi == ot->_hi )
+    return old;
+
+  // If new guy contains old, then we widened
+  if( _lo <= ot->_lo && _hi >= ot->_hi ) {
+    // New contains old
+    // If new guy is already wider than old, no widening
+    if( _widen > ot->_widen ) return this;
+    // If old guy was a constant, do not bother
+    if (ot->_lo == ot->_hi)  return this;
+    // Now widen new guy.
+    // Check for widening too far
+    if (_widen == WidenMax) {
+      if (min_jint < _lo && _hi < max_jint) {
+        // If neither endpoint is extremal yet, push out the endpoint
+        // which is closer to its respective limit.
+        if (_lo >= 0 ||                 // easy common case
+            (juint)(_lo - min_jint) >= (juint)(max_jint - _hi)) {
+          // Try to widen to an unsigned range type of 31 bits:
+          return make(_lo, max_jint, WidenMax);
+        } else {
+          return make(min_jint, _hi, WidenMax);
+        }
+      }
+      return TypeInt::INT;
+    }
+    // Returned widened new guy
+    return make(_lo,_hi,_widen+1);
+  }
+
+  // If old guy contains new, then we probably widened too far & dropped to
+  // bottom.  Return the wider fellow.
+  if ( ot->_lo <= _lo && ot->_hi >= _hi )
+    return old;
+
+  //fatal("Integer value range is not subset");
+  //return this;
+  return TypeInt::INT;
+}
+
+//------------------------------narrow---------------------------------------
+// Only happens for pessimistic optimizations.
+const Type *TypeInt::narrow( const Type *old ) const {
+  if (_lo >= _hi)  return this;   // already narrow enough
+  if (old == NULL)  return this;
+  const TypeInt* ot = old->isa_int();
+  if (ot == NULL)  return this;
+  jint olo = ot->_lo;
+  jint ohi = ot->_hi;
+
+  // If new guy is equal to old guy, no narrowing
+  if (_lo == olo && _hi == ohi)  return old;
+
+  // If old guy was maximum range, allow the narrowing
+  if (olo == min_jint && ohi == max_jint)  return this;
+
+  if (_lo < olo || _hi > ohi)
+    return this;                // doesn't narrow; pretty wierd
+
+  // The new type narrows the old type, so look for a "death march".
+  // See comments on PhaseTransform::saturate.
+  juint nrange = _hi - _lo;
+  juint orange = ohi - olo;
+  if (nrange < max_juint - 1 && nrange > (orange >> 1) + (SMALLINT*2)) {
+    // Use the new type only if the range shrinks a lot.
+    // We do not want the optimizer computing 2^31 point by point.
+    return old;
+  }
+
+  return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *TypeInt::filter( const Type *kills ) const {
+  const TypeInt* ft = join(kills)->isa_int();
+  if (ft == NULL || ft->_lo > ft->_hi)
+    return Type::TOP;           // Canonical empty value
+  if (ft->_widen < this->_widen) {
+    // Do not allow the value of kill->_widen to affect the outcome.
+    // The widen bits must be allowed to run freely through the graph.
+    ft = TypeInt::make(ft->_lo, ft->_hi, this->_widen);
+  }
+  return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeInt::eq( const Type *t ) const {
+  const TypeInt *r = t->is_int(); // Handy access
+  return r->_lo == _lo && r->_hi == _hi && r->_widen == _widen;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeInt::hash(void) const {
+  return _lo+_hi+_widen+(int)Type::Int;
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeInt::is_finite() const {
+  return true;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump TypeInt
+#ifndef PRODUCT
+static const char* intname(char* buf, jint n) {
+  if (n == min_jint)
+    return "min";
+  else if (n < min_jint + 10000)
+    sprintf(buf, "min+" INT32_FORMAT, n - min_jint);
+  else if (n == max_jint)
+    return "max";
+  else if (n > max_jint - 10000)
+    sprintf(buf, "max-" INT32_FORMAT, max_jint - n);
+  else
+    sprintf(buf, INT32_FORMAT, n);
+  return buf;
+}
+
+void TypeInt::dump2( Dict &d, uint depth, outputStream *st ) const {
+  char buf[40], buf2[40];
+  if (_lo == min_jint && _hi == max_jint)
+    st->print("int");
+  else if (is_con())
+    st->print("int:%s", intname(buf, get_con()));
+  else if (_lo == BOOL->_lo && _hi == BOOL->_hi)
+    st->print("bool");
+  else if (_lo == BYTE->_lo && _hi == BYTE->_hi)
+    st->print("byte");
+  else if (_lo == CHAR->_lo && _hi == CHAR->_hi)
+    st->print("char");
+  else if (_lo == SHORT->_lo && _hi == SHORT->_hi)
+    st->print("short");
+  else if (_hi == max_jint)
+    st->print("int:>=%s", intname(buf, _lo));
+  else if (_lo == min_jint)
+    st->print("int:<=%s", intname(buf, _hi));
+  else
+    st->print("int:%s..%s", intname(buf, _lo), intname(buf2, _hi));
+
+  if (_widen != 0 && this != TypeInt::INT)
+    st->print(":%.*s", _widen, "wwww");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants.
+bool TypeInt::singleton(void) const {
+  return _lo >= _hi;
+}
+
+bool TypeInt::empty(void) const {
+  return _lo > _hi;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeLong *TypeLong::MINUS_1;// -1
+const TypeLong *TypeLong::ZERO; // 0
+const TypeLong *TypeLong::ONE;  // 1
+const TypeLong *TypeLong::POS;  // >=0
+const TypeLong *TypeLong::LONG; // 64-bit integers
+const TypeLong *TypeLong::INT;  // 32-bit subrange
+const TypeLong *TypeLong::UINT; // 32-bit unsigned subrange
+
+//------------------------------TypeLong---------------------------------------
+TypeLong::TypeLong( jlong lo, jlong hi, int w ) : Type(Long), _lo(lo), _hi(hi), _widen(w) {
+}
+
+//------------------------------make-------------------------------------------
+const TypeLong *TypeLong::make( jlong lo ) {
+  return (TypeLong*)(new TypeLong(lo,lo,WidenMin))->hashcons();
+}
+
+const TypeLong *TypeLong::make( jlong lo, jlong hi, int w ) {
+  // Certain normalizations keep us sane when comparing types.
+  // The '1' covers constants.
+  if (lo <= hi) {
+    if ((julong)(hi - lo) <= SMALLINT)    w = Type::WidenMin;
+    if ((julong)(hi - lo) >= max_julong)  w = Type::WidenMax; // plain long
+  }
+  return (TypeLong*)(new TypeLong(lo,hi,w))->hashcons();
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type representation object
+// with reference count equal to the number of Types pointing at it.
+// Caller should wrap a Types around it.
+const Type *TypeLong::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type?
+
+  // Currently "this->_base" is a TypeLong
+  switch (t->base()) {          // Switch on original type
+  case AnyPtr:                  // Mixing with oops happens when javac
+  case RawPtr:                  // reuses local variables
+  case OopPtr:
+  case InstPtr:
+  case KlassPtr:
+  case AryPtr:
+  case Int:
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  default:                      // All else is a mistake
+    typerr(t);
+  case Top:                     // No change
+    return this;
+  case Long:                    // Long vs Long?
+    break;
+  }
+
+  // Expand covered set
+  const TypeLong *r = t->is_long(); // Turn into a TypeLong
+  // (Avoid TypeLong::make, to avoid the argument normalizations it enforces.)
+  return (new TypeLong( MIN2(_lo,r->_lo), MAX2(_hi,r->_hi), MAX2(_widen,r->_widen) ))->hashcons();
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: reverse hi & lo; flip widen
+const Type *TypeLong::xdual() const {
+  return new TypeLong(_hi,_lo,WidenMax-_widen);
+}
+
+//------------------------------widen------------------------------------------
+// Only happens for optimistic top-down optimizations.
+const Type *TypeLong::widen( const Type *old ) const {
+  // Coming from TOP or such; no widening
+  if( old->base() != Long ) return this;
+  const TypeLong *ot = old->is_long();
+
+  // If new guy is equal to old guy, no widening
+  if( _lo == ot->_lo && _hi == ot->_hi )
+    return old;
+
+  // If new guy contains old, then we widened
+  if( _lo <= ot->_lo && _hi >= ot->_hi ) {
+    // New contains old
+    // If new guy is already wider than old, no widening
+    if( _widen > ot->_widen ) return this;
+    // If old guy was a constant, do not bother
+    if (ot->_lo == ot->_hi)  return this;
+    // Now widen new guy.
+    // Check for widening too far
+    if (_widen == WidenMax) {
+      if (min_jlong < _lo && _hi < max_jlong) {
+        // If neither endpoint is extremal yet, push out the endpoint
+        // which is closer to its respective limit.
+        if (_lo >= 0 ||                 // easy common case
+            (julong)(_lo - min_jlong) >= (julong)(max_jlong - _hi)) {
+          // Try to widen to an unsigned range type of 32/63 bits:
+          if (_hi < max_juint)
+            return make(_lo, max_juint, WidenMax);
+          else
+            return make(_lo, max_jlong, WidenMax);
+        } else {
+          return make(min_jlong, _hi, WidenMax);
+        }
+      }
+      return TypeLong::LONG;
+    }
+    // Returned widened new guy
+    return make(_lo,_hi,_widen+1);
+  }
+
+  // If old guy contains new, then we probably widened too far & dropped to
+  // bottom.  Return the wider fellow.
+  if ( ot->_lo <= _lo && ot->_hi >= _hi )
+    return old;
+
+  //  fatal("Long value range is not subset");
+  // return this;
+  return TypeLong::LONG;
+}
+
+//------------------------------narrow----------------------------------------
+// Only happens for pessimistic optimizations.
+const Type *TypeLong::narrow( const Type *old ) const {
+  if (_lo >= _hi)  return this;   // already narrow enough
+  if (old == NULL)  return this;
+  const TypeLong* ot = old->isa_long();
+  if (ot == NULL)  return this;
+  jlong olo = ot->_lo;
+  jlong ohi = ot->_hi;
+
+  // If new guy is equal to old guy, no narrowing
+  if (_lo == olo && _hi == ohi)  return old;
+
+  // If old guy was maximum range, allow the narrowing
+  if (olo == min_jlong && ohi == max_jlong)  return this;
+
+  if (_lo < olo || _hi > ohi)
+    return this;                // doesn't narrow; pretty wierd
+
+  // The new type narrows the old type, so look for a "death march".
+  // See comments on PhaseTransform::saturate.
+  julong nrange = _hi - _lo;
+  julong orange = ohi - olo;
+  if (nrange < max_julong - 1 && nrange > (orange >> 1) + (SMALLINT*2)) {
+    // Use the new type only if the range shrinks a lot.
+    // We do not want the optimizer computing 2^31 point by point.
+    return old;
+  }
+
+  return this;
+}
+
+//-----------------------------filter------------------------------------------
+const Type *TypeLong::filter( const Type *kills ) const {
+  const TypeLong* ft = join(kills)->isa_long();
+  if (ft == NULL || ft->_lo > ft->_hi)
+    return Type::TOP;           // Canonical empty value
+  if (ft->_widen < this->_widen) {
+    // Do not allow the value of kill->_widen to affect the outcome.
+    // The widen bits must be allowed to run freely through the graph.
+    ft = TypeLong::make(ft->_lo, ft->_hi, this->_widen);
+  }
+  return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeLong::eq( const Type *t ) const {
+  const TypeLong *r = t->is_long(); // Handy access
+  return r->_lo == _lo &&  r->_hi == _hi  && r->_widen == _widen;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeLong::hash(void) const {
+  return (int)(_lo+_hi+_widen+(int)Type::Long);
+}
+
+//------------------------------is_finite--------------------------------------
+// Has a finite value
+bool TypeLong::is_finite() const {
+  return true;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump TypeLong
+#ifndef PRODUCT
+static const char* longnamenear(jlong x, const char* xname, char* buf, jlong n) {
+  if (n > x) {
+    if (n >= x + 10000)  return NULL;
+    sprintf(buf, "%s+" INT64_FORMAT, xname, n - x);
+  } else if (n < x) {
+    if (n <= x - 10000)  return NULL;
+    sprintf(buf, "%s-" INT64_FORMAT, xname, x - n);
+  } else {
+    return xname;
+  }
+  return buf;
+}
+
+static const char* longname(char* buf, jlong n) {
+  const char* str;
+  if (n == min_jlong)
+    return "min";
+  else if (n < min_jlong + 10000)
+    sprintf(buf, "min+" INT64_FORMAT, n - min_jlong);
+  else if (n == max_jlong)
+    return "max";
+  else if (n > max_jlong - 10000)
+    sprintf(buf, "max-" INT64_FORMAT, max_jlong - n);
+  else if ((str = longnamenear(max_juint, "maxuint", buf, n)) != NULL)
+    return str;
+  else if ((str = longnamenear(max_jint, "maxint", buf, n)) != NULL)
+    return str;
+  else if ((str = longnamenear(min_jint, "minint", buf, n)) != NULL)
+    return str;
+  else
+    sprintf(buf, INT64_FORMAT, n);
+  return buf;
+}
+
+void TypeLong::dump2( Dict &d, uint depth, outputStream *st ) const {
+  char buf[80], buf2[80];
+  if (_lo == min_jlong && _hi == max_jlong)
+    st->print("long");
+  else if (is_con())
+    st->print("long:%s", longname(buf, get_con()));
+  else if (_hi == max_jlong)
+    st->print("long:>=%s", longname(buf, _lo));
+  else if (_lo == min_jlong)
+    st->print("long:<=%s", longname(buf, _hi));
+  else
+    st->print("long:%s..%s", longname(buf, _lo), longname(buf2, _hi));
+
+  if (_widen != 0 && this != TypeLong::LONG)
+    st->print(":%.*s", _widen, "wwww");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants
+bool TypeLong::singleton(void) const {
+  return _lo >= _hi;
+}
+
+bool TypeLong::empty(void) const {
+  return _lo > _hi;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeTuple *TypeTuple::IFBOTH;     // Return both arms of IF as reachable
+const TypeTuple *TypeTuple::IFFALSE;
+const TypeTuple *TypeTuple::IFTRUE;
+const TypeTuple *TypeTuple::IFNEITHER;
+const TypeTuple *TypeTuple::LOOPBODY;
+const TypeTuple *TypeTuple::MEMBAR;
+const TypeTuple *TypeTuple::STORECONDITIONAL;
+const TypeTuple *TypeTuple::START_I2C;
+const TypeTuple *TypeTuple::INT_PAIR;
+const TypeTuple *TypeTuple::LONG_PAIR;
+
+
+//------------------------------make-------------------------------------------
+// Make a TypeTuple from the range of a method signature
+const TypeTuple *TypeTuple::make_range(ciSignature* sig) {
+  ciType* return_type = sig->return_type();
+  uint total_fields = TypeFunc::Parms + return_type->size();
+  const Type **field_array = fields(total_fields);
+  switch (return_type->basic_type()) {
+  case T_LONG:
+    field_array[TypeFunc::Parms]   = TypeLong::LONG;
+    field_array[TypeFunc::Parms+1] = Type::HALF;
+    break;
+  case T_DOUBLE:
+    field_array[TypeFunc::Parms]   = Type::DOUBLE;
+    field_array[TypeFunc::Parms+1] = Type::HALF;
+    break;
+  case T_OBJECT:
+  case T_ARRAY:
+  case T_BOOLEAN:
+  case T_CHAR:
+  case T_FLOAT:
+  case T_BYTE:
+  case T_SHORT:
+  case T_INT:
+    field_array[TypeFunc::Parms] = get_const_type(return_type);
+    break;
+  case T_VOID:
+    break;
+  default:
+    ShouldNotReachHere();
+  }
+  return (TypeTuple*)(new TypeTuple(total_fields,field_array))->hashcons();
+}
+
+// Make a TypeTuple from the domain of a method signature
+const TypeTuple *TypeTuple::make_domain(ciInstanceKlass* recv, ciSignature* sig) {
+  uint total_fields = TypeFunc::Parms + sig->size();
+
+  uint pos = TypeFunc::Parms;
+  const Type **field_array;
+  if (recv != NULL) {
+    total_fields++;
+    field_array = fields(total_fields);
+    // Use get_const_type here because it respects UseUniqueSubclasses:
+    field_array[pos++] = get_const_type(recv)->join(TypePtr::NOTNULL);
+  } else {
+    field_array = fields(total_fields);
+  }
+
+  int i = 0;
+  while (pos < total_fields) {
+    ciType* type = sig->type_at(i);
+
+    switch (type->basic_type()) {
+    case T_LONG:
+      field_array[pos++] = TypeLong::LONG;
+      field_array[pos++] = Type::HALF;
+      break;
+    case T_DOUBLE:
+      field_array[pos++] = Type::DOUBLE;
+      field_array[pos++] = Type::HALF;
+      break;
+    case T_OBJECT:
+    case T_ARRAY:
+    case T_BOOLEAN:
+    case T_CHAR:
+    case T_FLOAT:
+    case T_BYTE:
+    case T_SHORT:
+    case T_INT:
+      field_array[pos++] = get_const_type(type);
+      break;
+    default:
+      ShouldNotReachHere();
+    }
+    i++;
+  }
+  return (TypeTuple*)(new TypeTuple(total_fields,field_array))->hashcons();
+}
+
+const TypeTuple *TypeTuple::make( uint cnt, const Type **fields ) {
+  return (TypeTuple*)(new TypeTuple(cnt,fields))->hashcons();
+}
+
+//------------------------------fields-----------------------------------------
+// Subroutine call type with space allocated for argument types
+const Type **TypeTuple::fields( uint arg_cnt ) {
+  const Type **flds = (const Type **)(Compile::current()->type_arena()->Amalloc_4((TypeFunc::Parms+arg_cnt)*sizeof(Type*) ));
+  flds[TypeFunc::Control  ] = Type::CONTROL;
+  flds[TypeFunc::I_O      ] = Type::ABIO;
+  flds[TypeFunc::Memory   ] = Type::MEMORY;
+  flds[TypeFunc::FramePtr ] = TypeRawPtr::BOTTOM;
+  flds[TypeFunc::ReturnAdr] = Type::RETURN_ADDRESS;
+
+  return flds;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeTuple::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Tuple
+  switch (t->base()) {          // switch on original type
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case Tuple: {                 // Meeting 2 signatures?
+    const TypeTuple *x = t->is_tuple();
+    assert( _cnt == x->_cnt, "" );
+    const Type **fields = (const Type **)(Compile::current()->type_arena()->Amalloc_4( _cnt*sizeof(Type*) ));
+    for( uint i=0; i<_cnt; i++ )
+      fields[i] = field_at(i)->xmeet( x->field_at(i) );
+    return TypeTuple::make(_cnt,fields);
+  }
+  case Top:
+    break;
+  }
+  return this;                  // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeTuple::xdual() const {
+  const Type **fields = (const Type **)(Compile::current()->type_arena()->Amalloc_4( _cnt*sizeof(Type*) ));
+  for( uint i=0; i<_cnt; i++ )
+    fields[i] = _fields[i]->dual();
+  return new TypeTuple(_cnt,fields);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeTuple::eq( const Type *t ) const {
+  const TypeTuple *s = (const TypeTuple *)t;
+  if (_cnt != s->_cnt)  return false;  // Unequal field counts
+  for (uint i = 0; i < _cnt; i++)
+    if (field_at(i) != s->field_at(i)) // POINTER COMPARE!  NO RECURSION!
+      return false;             // Missed
+  return true;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeTuple::hash(void) const {
+  intptr_t sum = _cnt;
+  for( uint i=0; i<_cnt; i++ )
+    sum += (intptr_t)_fields[i];     // Hash on pointers directly
+  return sum;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump signature Type
+#ifndef PRODUCT
+void TypeTuple::dump2( Dict &d, uint depth, outputStream *st ) const {
+  st->print("{");
+  if( !depth || d[this] ) {     // Check for recursive print
+    st->print("...}");
+    return;
+  }
+  d.Insert((void*)this, (void*)this);   // Stop recursion
+  if( _cnt ) {
+    uint i;
+    for( i=0; i<_cnt-1; i++ ) {
+      st->print("%d:", i);
+      _fields[i]->dump2(d, depth-1, st);
+      st->print(", ");
+    }
+    st->print("%d:", i);
+    _fields[i]->dump2(d, depth-1, st);
+  }
+  st->print("}");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeTuple::singleton(void) const {
+  return false;                 // Never a singleton
+}
+
+bool TypeTuple::empty(void) const {
+  for( uint i=0; i<_cnt; i++ ) {
+    if (_fields[i]->empty())  return true;
+  }
+  return false;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+
+inline const TypeInt* normalize_array_size(const TypeInt* size) {
+  // Certain normalizations keep us sane when comparing types.
+  // We do not want arrayOop variables to differ only by the wideness
+  // of their index types.  Pick minimum wideness, since that is the
+  // forced wideness of small ranges anyway.
+  if (size->_widen != Type::WidenMin)
+    return TypeInt::make(size->_lo, size->_hi, Type::WidenMin);
+  else
+    return size;
+}
+
+//------------------------------make-------------------------------------------
+const TypeAry *TypeAry::make( const Type *elem, const TypeInt *size) {
+  size = normalize_array_size(size);
+  return (TypeAry*)(new TypeAry(elem,size))->hashcons();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeAry::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Ary
+  switch (t->base()) {          // switch on original type
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case Array: {                 // Meeting 2 arrays?
+    const TypeAry *a = t->is_ary();
+    return TypeAry::make(_elem->meet(a->_elem),
+                         _size->xmeet(a->_size)->is_int());
+  }
+  case Top:
+    break;
+  }
+  return this;                  // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeAry::xdual() const {
+  const TypeInt* size_dual = _size->dual()->is_int();
+  size_dual = normalize_array_size(size_dual);
+  return new TypeAry( _elem->dual(), size_dual);
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeAry::eq( const Type *t ) const {
+  const TypeAry *a = (const TypeAry*)t;
+  return _elem == a->_elem &&
+    _size == a->_size;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeAry::hash(void) const {
+  return (intptr_t)_elem + (intptr_t)_size;
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeAry::dump2( Dict &d, uint depth, outputStream *st ) const {
+  _elem->dump2(d, depth, st);
+  st->print("[");
+  _size->dump2(d, depth, st);
+  st->print("]");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeAry::singleton(void) const {
+  return false;                 // Never a singleton
+}
+
+bool TypeAry::empty(void) const {
+  return _elem->empty() || _size->empty();
+}
+
+//--------------------------ary_must_be_exact----------------------------------
+bool TypeAry::ary_must_be_exact() const {
+  if (!UseExactTypes)       return false;
+  // This logic looks at the element type of an array, and returns true
+  // if the element type is either a primitive or a final instance class.
+  // In such cases, an array built on this ary must have no subclasses.
+  if (_elem == BOTTOM)      return false;  // general array not exact
+  if (_elem == TOP   )      return false;  // inverted general array not exact
+  const TypeOopPtr*  toop = _elem->isa_oopptr();
+  if (!toop)                return true;   // a primitive type, like int
+  ciKlass* tklass = toop->klass();
+  if (tklass == NULL)       return false;  // unloaded class
+  if (!tklass->is_loaded()) return false;  // unloaded class
+  const TypeInstPtr* tinst = _elem->isa_instptr();
+  if (tinst)                return tklass->as_instance_klass()->is_final();
+  const TypeAryPtr*  tap = _elem->isa_aryptr();
+  if (tap)                  return tap->ary()->ary_must_be_exact();
+  return false;
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypePtr *TypePtr::NULL_PTR;
+const TypePtr *TypePtr::NOTNULL;
+const TypePtr *TypePtr::BOTTOM;
+
+//------------------------------meet-------------------------------------------
+// Meet over the PTR enum
+const TypePtr::PTR TypePtr::ptr_meet[TypePtr::lastPTR][TypePtr::lastPTR] = {
+  //              TopPTR,    AnyNull,   Constant, Null,   NotNull, BotPTR,
+  { /* Top     */ TopPTR,    AnyNull,   Constant, Null,   NotNull, BotPTR,},
+  { /* AnyNull */ AnyNull,   AnyNull,   Constant, BotPTR, NotNull, BotPTR,},
+  { /* Constant*/ Constant,  Constant,  Constant, BotPTR, NotNull, BotPTR,},
+  { /* Null    */ Null,      BotPTR,    BotPTR,   Null,   BotPTR,  BotPTR,},
+  { /* NotNull */ NotNull,   NotNull,   NotNull,  BotPTR, NotNull, BotPTR,},
+  { /* BotPTR  */ BotPTR,    BotPTR,    BotPTR,   BotPTR, BotPTR,  BotPTR,}
+};
+
+//------------------------------make-------------------------------------------
+const TypePtr *TypePtr::make( TYPES t, enum PTR ptr, int offset ) {
+  return (TypePtr*)(new TypePtr(t,ptr,offset))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypePtr::cast_to_ptr_type(PTR ptr) const {
+  assert(_base == AnyPtr, "subclass must override cast_to_ptr_type");
+  if( ptr == _ptr ) return this;
+  return make(_base, ptr, _offset);
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypePtr::get_con() const {
+  assert( _ptr == Null, "" );
+  return _offset;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypePtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is AnyPtr
+  switch (t->base()) {          // switch on original type
+  case Int:                     // Mixing ints & oops happens when javac
+  case Long:                    // reuses local variables
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  case Top:
+    return this;
+
+  case AnyPtr: {                // Meeting to AnyPtrs
+    const TypePtr *tp = t->is_ptr();
+    return make( AnyPtr, meet_ptr(tp->ptr()), meet_offset(tp->offset()) );
+  }
+  case RawPtr:                  // For these, flip the call around to cut down
+  case OopPtr:
+  case InstPtr:                 // on the cases I have to handle.
+  case KlassPtr:
+  case AryPtr:
+    return t->xmeet(this);      // Call in reverse direction
+  default:                      // All else is a mistake
+    typerr(t);
+
+  }
+  return this;
+}
+
+//------------------------------meet_offset------------------------------------
+int TypePtr::meet_offset( int offset ) const {
+  // Either is 'TOP' offset?  Return the other offset!
+  if( _offset == OffsetTop ) return offset;
+  if( offset == OffsetTop ) return _offset;
+  // If either is different, return 'BOTTOM' offset
+  if( _offset != offset ) return OffsetBot;
+  return _offset;
+}
+
+//------------------------------dual_offset------------------------------------
+int TypePtr::dual_offset( ) const {
+  if( _offset == OffsetTop ) return OffsetBot;// Map 'TOP' into 'BOTTOM'
+  if( _offset == OffsetBot ) return OffsetTop;// Map 'BOTTOM' into 'TOP'
+  return _offset;               // Map everything else into self
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const TypePtr::PTR TypePtr::ptr_dual[TypePtr::lastPTR] = {
+  BotPTR, NotNull, Constant, Null, AnyNull, TopPTR
+};
+const Type *TypePtr::xdual() const {
+  return new TypePtr( AnyPtr, dual_ptr(), dual_offset() );
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypePtr::add_offset( int offset ) const {
+  if( offset == 0 ) return this; // No change
+  if( _offset == OffsetBot ) return this;
+  if(  offset == OffsetBot ) offset = OffsetBot;
+  else if( _offset == OffsetTop || offset == OffsetTop ) offset = OffsetTop;
+  else offset += _offset;
+  return make( AnyPtr, _ptr, offset );
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypePtr::eq( const Type *t ) const {
+  const TypePtr *a = (const TypePtr*)t;
+  return _ptr == a->ptr() && _offset == a->offset();
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypePtr::hash(void) const {
+  return _ptr + _offset;
+}
+
+//------------------------------dump2------------------------------------------
+const char *const TypePtr::ptr_msg[TypePtr::lastPTR] = {
+  "TopPTR","AnyNull","Constant","NULL","NotNull","BotPTR"
+};
+
+#ifndef PRODUCT
+void TypePtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+  if( _ptr == Null ) st->print("NULL");
+  else st->print("%s *", ptr_msg[_ptr]);
+  if( _offset == OffsetTop ) st->print("+top");
+  else if( _offset == OffsetBot ) st->print("+bot");
+  else if( _offset ) st->print("+%d", _offset);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants
+bool TypePtr::singleton(void) const {
+  // TopPTR, Null, AnyNull, Constant are all singletons
+  return (_offset != OffsetBot) && !below_centerline(_ptr);
+}
+
+bool TypePtr::empty(void) const {
+  return (_offset == OffsetTop) || above_centerline(_ptr);
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeRawPtr *TypeRawPtr::BOTTOM;
+const TypeRawPtr *TypeRawPtr::NOTNULL;
+
+//------------------------------make-------------------------------------------
+const TypeRawPtr *TypeRawPtr::make( enum PTR ptr ) {
+  assert( ptr != Constant, "what is the constant?" );
+  assert( ptr != Null, "Use TypePtr for NULL" );
+  return (TypeRawPtr*)(new TypeRawPtr(ptr,0))->hashcons();
+}
+
+const TypeRawPtr *TypeRawPtr::make( address bits ) {
+  assert( bits, "Use TypePtr for NULL" );
+  return (TypeRawPtr*)(new TypeRawPtr(Constant,bits))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeRawPtr::cast_to_ptr_type(PTR ptr) const {
+  assert( ptr != Constant, "what is the constant?" );
+  assert( ptr != Null, "Use TypePtr for NULL" );
+  assert( _bits==0, "Why cast a constant address?");
+  if( ptr == _ptr ) return this;
+  return make(ptr);
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypeRawPtr::get_con() const {
+  assert( _ptr == Null || _ptr == Constant, "" );
+  return (intptr_t)_bits;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeRawPtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is RawPtr
+  switch( t->base() ) {         // switch on original type
+  case Bottom:                  // Ye Olde Default
+    return t;
+  case Top:
+    return this;
+  case AnyPtr:                  // Meeting to AnyPtrs
+    break;
+  case RawPtr: {                // might be top, bot, any/not or constant
+    enum PTR tptr = t->is_ptr()->ptr();
+    enum PTR ptr = meet_ptr( tptr );
+    if( ptr == Constant ) {     // Cannot be equal constants, so...
+      if( tptr == Constant && _ptr != Constant)  return t;
+      if( _ptr == Constant && tptr != Constant)  return this;
+      ptr = NotNull;            // Fall down in lattice
+    }
+    return make( ptr );
+  }
+
+  case OopPtr:
+  case InstPtr:
+  case KlassPtr:
+  case AryPtr:
+    return TypePtr::BOTTOM;     // Oop meet raw is not well defined
+  default:                      // All else is a mistake
+    typerr(t);
+  }
+
+  // Found an AnyPtr type vs self-RawPtr type
+  const TypePtr *tp = t->is_ptr();
+  switch (tp->ptr()) {
+  case TypePtr::TopPTR:  return this;
+  case TypePtr::BotPTR:  return t;
+  case TypePtr::Null:
+    if( _ptr == TypePtr::TopPTR ) return t;
+    return TypeRawPtr::BOTTOM;
+  case TypePtr::NotNull: return TypePtr::make( AnyPtr, meet_ptr(TypePtr::NotNull), tp->meet_offset(0) );
+  case TypePtr::AnyNull:
+    if( _ptr == TypePtr::Constant) return this;
+    return make( meet_ptr(TypePtr::AnyNull) );
+  default: ShouldNotReachHere();
+  }
+  return this;
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeRawPtr::xdual() const {
+  return new TypeRawPtr( dual_ptr(), _bits );
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeRawPtr::add_offset( int offset ) const {
+  if( offset == OffsetTop ) return BOTTOM; // Undefined offset-> undefined pointer
+  if( offset == OffsetBot ) return BOTTOM; // Unknown offset-> unknown pointer
+  if( offset == 0 ) return this; // No change
+  switch (_ptr) {
+  case TypePtr::TopPTR:
+  case TypePtr::BotPTR:
+  case TypePtr::NotNull:
+    return this;
+  case TypePtr::Null:
+  case TypePtr::Constant:
+    return make( _bits+offset );
+  default:  ShouldNotReachHere();
+  }
+  return NULL;                  // Lint noise
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeRawPtr::eq( const Type *t ) const {
+  const TypeRawPtr *a = (const TypeRawPtr*)t;
+  return _bits == a->_bits && TypePtr::eq(t);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeRawPtr::hash(void) const {
+  return (intptr_t)_bits + TypePtr::hash();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeRawPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+  if( _ptr == Constant )
+    st->print(INTPTR_FORMAT, _bits);
+  else
+    st->print("rawptr:%s", ptr_msg[_ptr]);
+}
+#endif
+
+//=============================================================================
+// Convenience common pre-built type.
+const TypeOopPtr *TypeOopPtr::BOTTOM;
+
+//------------------------------make-------------------------------------------
+const TypeOopPtr *TypeOopPtr::make(PTR ptr,
+                                   int offset) {
+  assert(ptr != Constant, "no constant generic pointers");
+  ciKlass*  k = ciKlassKlass::make();
+  bool      xk = false;
+  ciObject* o = NULL;
+  return (TypeOopPtr*)(new TypeOopPtr(OopPtr, ptr, k, xk, o, offset, UNKNOWN_INSTANCE))->hashcons();
+}
+
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeOopPtr::cast_to_ptr_type(PTR ptr) const {
+  assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
+  if( ptr == _ptr ) return this;
+  return make(ptr, _offset);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeOopPtr::cast_to_instance(int instance_id) const {
+  // There are no instances of a general oop.
+  // Return self unchanged.
+  return this;
+}
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeOopPtr::cast_to_exactness(bool klass_is_exact) const {
+  // There is no such thing as an exact general oop.
+  // Return self unchanged.
+  return this;
+}
+
+
+//------------------------------as_klass_type----------------------------------
+// Return the klass type corresponding to this instance or array type.
+// It is the type that is loaded from an object of this type.
+const TypeKlassPtr* TypeOopPtr::as_klass_type() const {
+  ciKlass* k = klass();
+  bool    xk = klass_is_exact();
+  if (k == NULL || !k->is_java_klass())
+    return TypeKlassPtr::OBJECT;
+  else
+    return TypeKlassPtr::make(xk? Constant: NotNull, k, 0);
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeOopPtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is OopPtr
+  switch (t->base()) {          // switch on original type
+
+  case Int:                     // Mixing ints & oops happens when javac
+  case Long:                    // reuses local variables
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  case Top:
+    return this;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case RawPtr:
+    return TypePtr::BOTTOM;     // Oop meet raw is not well defined
+
+  case AnyPtr: {
+    // Found an AnyPtr type vs self-OopPtr type
+    const TypePtr *tp = t->is_ptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case Null:
+      if (ptr == Null)  return TypePtr::make(AnyPtr, ptr, offset);
+      // else fall through:
+    case TopPTR:
+    case AnyNull:
+      return make(ptr, offset);
+    case BotPTR:
+    case NotNull:
+      return TypePtr::make(AnyPtr, ptr, offset);
+    default: typerr(t);
+    }
+  }
+
+  case OopPtr: {                 // Meeting to other OopPtrs
+    const TypeOopPtr *tp = t->is_oopptr();
+    return make( meet_ptr(tp->ptr()), meet_offset(tp->offset()) );
+  }
+
+  case InstPtr:                  // For these, flip the call around to cut down
+  case KlassPtr:                 // on the cases I have to handle.
+  case AryPtr:
+    return t->xmeet(this);      // Call in reverse direction
+
+  } // End of switch
+  return this;                  // Return the double constant
+}
+
+
+//------------------------------xdual------------------------------------------
+// Dual of a pure heap pointer.  No relevant klass or oop information.
+const Type *TypeOopPtr::xdual() const {
+  assert(klass() == ciKlassKlass::make(), "no klasses here");
+  assert(const_oop() == NULL,             "no constants here");
+  return new TypeOopPtr(_base, dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance()  );
+}
+
+//--------------------------make_from_klass_common-----------------------------
+// Computes the element-type given a klass.
+const TypeOopPtr* TypeOopPtr::make_from_klass_common(ciKlass *klass, bool klass_change, bool try_for_exact) {
+  assert(klass->is_java_klass(), "must be java language klass");
+  if (klass->is_instance_klass()) {
+    Compile* C = Compile::current();
+    Dependencies* deps = C->dependencies();
+    assert((deps != NULL) == (C->method() != NULL && C->method()->code_size() > 0), "sanity");
+    // Element is an instance
+    bool klass_is_exact = false;
+    if (klass->is_loaded()) {
+      // Try to set klass_is_exact.
+      ciInstanceKlass* ik = klass->as_instance_klass();
+      klass_is_exact = ik->is_final();
+      if (!klass_is_exact && klass_change
+          && deps != NULL && UseUniqueSubclasses) {
+        ciInstanceKlass* sub = ik->unique_concrete_subklass();
+        if (sub != NULL) {
+          deps->assert_abstract_with_unique_concrete_subtype(ik, sub);
+          klass = ik = sub;
+          klass_is_exact = sub->is_final();
+        }
+      }
+      if (!klass_is_exact && try_for_exact
+          && deps != NULL && UseExactTypes) {
+        if (!ik->is_interface() && !ik->has_subklass()) {
+          // Add a dependence; if concrete subclass added we need to recompile
+          deps->assert_leaf_type(ik);
+          klass_is_exact = true;
+        }
+      }
+    }
+    return TypeInstPtr::make(TypePtr::BotPTR, klass, klass_is_exact, NULL, 0);
+  } else if (klass->is_obj_array_klass()) {
+    // Element is an object array. Recursively call ourself.
+    const TypeOopPtr *etype = TypeOopPtr::make_from_klass_common(klass->as_obj_array_klass()->element_klass(), false, try_for_exact);
+    bool xk = etype->klass_is_exact();
+    const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+    // We used to pass NotNull in here, asserting that the sub-arrays
+    // are all not-null.  This is not true in generally, as code can
+    // slam NULLs down in the subarrays.
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::BotPTR, arr0, klass, xk, 0);
+    return arr;
+  } else if (klass->is_type_array_klass()) {
+    // Element is an typeArray
+    const Type* etype = get_const_basic_type(klass->as_type_array_klass()->element_type());
+    const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+    // We used to pass NotNull in here, asserting that the array pointer
+    // is not-null. That was not true in general.
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::BotPTR, arr0, klass, true, 0);
+    return arr;
+  } else {
+    ShouldNotReachHere();
+    return NULL;
+  }
+}
+
+//------------------------------make_from_constant-----------------------------
+// Make a java pointer from an oop constant
+const TypeOopPtr* TypeOopPtr::make_from_constant(ciObject* o) {
+  if (o->is_method_data() || o->is_method()) {
+    // Treat much like a typeArray of bytes, like below, but fake the type...
+    assert(o->has_encoding(), "must be a perm space object");
+    const Type* etype = (Type*)get_const_basic_type(T_BYTE);
+    const TypeAry* arr0 = TypeAry::make(etype, TypeInt::POS);
+    ciKlass *klass = ciTypeArrayKlass::make((BasicType) T_BYTE);
+    assert(o->has_encoding(), "method data oops should be tenured");
+    const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+    return arr;
+  } else {
+    assert(o->is_java_object(), "must be java language object");
+    assert(!o->is_null_object(), "null object not yet handled here.");
+    ciKlass *klass = o->klass();
+    if (klass->is_instance_klass()) {
+      // Element is an instance
+      if (!o->has_encoding()) {  // not a perm-space constant
+        // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+        return TypeInstPtr::make(TypePtr::NotNull, klass, true, NULL, 0);
+      }
+      return TypeInstPtr::make(o);
+    } else if (klass->is_obj_array_klass()) {
+      // Element is an object array. Recursively call ourself.
+      const Type *etype =
+        TypeOopPtr::make_from_klass_raw(klass->as_obj_array_klass()->element_klass());
+      const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
+      // We used to pass NotNull in here, asserting that the sub-arrays
+      // are all not-null.  This is not true in generally, as code can
+      // slam NULLs down in the subarrays.
+      if (!o->has_encoding()) {  // not a perm-space constant
+        // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+        return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
+      }
+      const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+      return arr;
+    } else if (klass->is_type_array_klass()) {
+      // Element is an typeArray
+      const Type* etype =
+        (Type*)get_const_basic_type(klass->as_type_array_klass()->element_type());
+      const TypeAry* arr0 = TypeAry::make(etype, TypeInt::make(o->as_array()->length()));
+      // We used to pass NotNull in here, asserting that the array pointer
+      // is not-null. That was not true in general.
+      if (!o->has_encoding()) {  // not a perm-space constant
+        // %%% remove this restriction by rewriting non-perm ConPNodes in a later phase
+        return TypeAryPtr::make(TypePtr::NotNull, arr0, klass, true, 0);
+      }
+      const TypeAryPtr* arr = TypeAryPtr::make(TypePtr::Constant, o, arr0, klass, true, 0);
+      return arr;
+    }
+  }
+
+  ShouldNotReachHere();
+  return NULL;
+}
+
+//------------------------------get_con----------------------------------------
+intptr_t TypeOopPtr::get_con() const {
+  assert( _ptr == Null || _ptr == Constant, "" );
+  assert( _offset >= 0, "" );
+
+  if (_offset != 0) {
+    // After being ported to the compiler interface, the compiler no longer
+    // directly manipulates the addresses of oops.  Rather, it only has a pointer
+    // to a handle at compile time.  This handle is embedded in the generated
+    // code and dereferenced at the time the nmethod is made.  Until that time,
+    // it is not reasonable to do arithmetic with the addresses of oops (we don't
+    // have access to the addresses!).  This does not seem to currently happen,
+    // but this assertion here is to help prevent its occurrance.
+    tty->print_cr("Found oop constant with non-zero offset");
+    ShouldNotReachHere();
+  }
+
+  return (intptr_t)const_oop()->encoding();
+}
+
+
+//-----------------------------filter------------------------------------------
+// Do not allow interface-vs.-noninterface joins to collapse to top.
+const Type *TypeOopPtr::filter( const Type *kills ) const {
+
+  const Type* ft = join(kills);
+  const TypeInstPtr* ftip = ft->isa_instptr();
+  const TypeInstPtr* ktip = kills->isa_instptr();
+
+  if (ft->empty()) {
+    // Check for evil case of 'this' being a class and 'kills' expecting an
+    // interface.  This can happen because the bytecodes do not contain
+    // enough type info to distinguish a Java-level interface variable
+    // from a Java-level object variable.  If we meet 2 classes which
+    // both implement interface I, but their meet is at 'j/l/O' which
+    // doesn't implement I, we have no way to tell if the result should
+    // be 'I' or 'j/l/O'.  Thus we'll pick 'j/l/O'.  If this then flows
+    // into a Phi which "knows" it's an Interface type we'll have to
+    // uplift the type.
+    if (!empty() && ktip != NULL && ktip->is_loaded() && ktip->klass()->is_interface())
+      return kills;             // Uplift to interface
+
+    return Type::TOP;           // Canonical empty value
+  }
+
+  // If we have an interface-typed Phi or cast and we narrow to a class type,
+  // the join should report back the class.  However, if we have a J/L/Object
+  // class-typed Phi and an interface flows in, it's possible that the meet &
+  // join report an interface back out.  This isn't possible but happens
+  // because the type system doesn't interact well with interfaces.
+  if (ftip != NULL && ktip != NULL &&
+      ftip->is_loaded() &&  ftip->klass()->is_interface() &&
+      ktip->is_loaded() && !ktip->klass()->is_interface()) {
+    // Happens in a CTW of rt.jar, 320-341, no extra flags
+    return ktip->cast_to_ptr_type(ftip->ptr());
+  }
+
+  return ft;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeOopPtr::eq( const Type *t ) const {
+  const TypeOopPtr *a = (const TypeOopPtr*)t;
+  if (_klass_is_exact != a->_klass_is_exact ||
+      _instance_id != a->_instance_id)  return false;
+  ciObject* one = const_oop();
+  ciObject* two = a->const_oop();
+  if (one == NULL || two == NULL) {
+    return (one == two) && TypePtr::eq(t);
+  } else {
+    return one->equals(two) && TypePtr::eq(t);
+  }
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeOopPtr::hash(void) const {
+  return
+    (const_oop() ? const_oop()->hash() : 0) +
+    _klass_is_exact +
+    _instance_id +
+    TypePtr::hash();
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeOopPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+  st->print("oopptr:%s", ptr_msg[_ptr]);
+  if( _klass_is_exact ) st->print(":exact");
+  if( const_oop() ) st->print(INTPTR_FORMAT, const_oop());
+  switch( _offset ) {
+  case OffsetTop: st->print("+top"); break;
+  case OffsetBot: st->print("+any"); break;
+  case         0: break;
+  default:        st->print("+%d",_offset); break;
+  }
+  if (_instance_id != UNKNOWN_INSTANCE)
+    st->print(",iid=%d",_instance_id);
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants
+bool TypeOopPtr::singleton(void) const {
+  // detune optimizer to not generate constant oop + constant offset as a constant!
+  // TopPTR, Null, AnyNull, Constant are all singletons
+  return (_offset == 0) && !below_centerline(_ptr);
+}
+
+//------------------------------xadd_offset------------------------------------
+int TypeOopPtr::xadd_offset( int offset ) const {
+  // Adding to 'TOP' offset?  Return 'TOP'!
+  if( _offset == OffsetTop || offset == OffsetTop ) return OffsetTop;
+  // Adding to 'BOTTOM' offset?  Return 'BOTTOM'!
+  if( _offset == OffsetBot || offset == OffsetBot ) return OffsetBot;
+
+  // assert( _offset >= 0 && _offset+offset >= 0, "" );
+  // It is possible to construct a negative offset during PhaseCCP
+
+  return _offset+offset;        // Sum valid offsets
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeOopPtr::add_offset( int offset ) const {
+  return make( _ptr, xadd_offset(offset) );
+}
+
+int TypeOopPtr::meet_instance(int iid) const {
+  if (iid == 0) {
+    return (_instance_id < 0)  ? _instance_id : UNKNOWN_INSTANCE;
+  } else if (_instance_id == UNKNOWN_INSTANCE) {
+    return (iid < 0)  ? iid : UNKNOWN_INSTANCE;
+  } else {
+    return (_instance_id == iid) ? iid : UNKNOWN_INSTANCE;
+  }
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeInstPtr *TypeInstPtr::NOTNULL;
+const TypeInstPtr *TypeInstPtr::BOTTOM;
+const TypeInstPtr *TypeInstPtr::MIRROR;
+const TypeInstPtr *TypeInstPtr::MARK;
+const TypeInstPtr *TypeInstPtr::KLASS;
+
+//------------------------------TypeInstPtr-------------------------------------
+TypeInstPtr::TypeInstPtr(PTR ptr, ciKlass* k, bool xk, ciObject* o, int off, int instance_id)
+ : TypeOopPtr(InstPtr, ptr, k, xk, o, off, instance_id), _name(k->name()) {
+   assert(k != NULL &&
+          (k->is_loaded() || o == NULL),
+          "cannot have constants with non-loaded klass");
+};
+
+//------------------------------make-------------------------------------------
+const TypeInstPtr *TypeInstPtr::make(PTR ptr,
+                                     ciKlass* k,
+                                     bool xk,
+                                     ciObject* o,
+                                     int offset,
+                                     int instance_id) {
+  assert( !k->is_loaded() || k->is_instance_klass() ||
+          k->is_method_klass(), "Must be for instance or method");
+  // Either const_oop() is NULL or else ptr is Constant
+  assert( (!o && ptr != Constant) || (o && ptr == Constant),
+          "constant pointers must have a value supplied" );
+  // Ptr is never Null
+  assert( ptr != Null, "NULL pointers are not typed" );
+
+  if (instance_id != UNKNOWN_INSTANCE)
+    xk = true;  // instances are always exactly typed
+  if (!UseExactTypes)  xk = false;
+  if (ptr == Constant) {
+    // Note:  This case includes meta-object constants, such as methods.
+    xk = true;
+  } else if (k->is_loaded()) {
+    ciInstanceKlass* ik = k->as_instance_klass();
+    if (!xk && ik->is_final())     xk = true;   // no inexact final klass
+    if (xk && ik->is_interface())  xk = false;  // no exact interface
+  }
+
+  // Now hash this baby
+  TypeInstPtr *result =
+    (TypeInstPtr*)(new TypeInstPtr(ptr, k, xk, o ,offset, instance_id))->hashcons();
+
+  return result;
+}
+
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeInstPtr::cast_to_ptr_type(PTR ptr) const {
+  if( ptr == _ptr ) return this;
+  // Reconstruct _sig info here since not a problem with later lazy
+  // construction, _sig will show up on demand.
+  return make(ptr, klass(), klass_is_exact(), const_oop(), _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeInstPtr::cast_to_exactness(bool klass_is_exact) const {
+  if( klass_is_exact == _klass_is_exact ) return this;
+  if (!UseExactTypes)  return this;
+  if (!_klass->is_loaded())  return this;
+  ciInstanceKlass* ik = _klass->as_instance_klass();
+  if( (ik->is_final() || _const_oop) )  return this;  // cannot clear xk
+  if( ik->is_interface() )              return this;  // cannot set xk
+  return make(ptr(), klass(), klass_is_exact, const_oop(), _offset, _instance_id);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeInstPtr::cast_to_instance(int instance_id) const {
+  if( instance_id == _instance_id) return this;
+  bool exact = (instance_id == UNKNOWN_INSTANCE) ? _klass_is_exact : true;
+
+  return make(ptr(), klass(), exact, const_oop(), _offset, instance_id);
+}
+
+//------------------------------xmeet_unloaded---------------------------------
+// Compute the MEET of two InstPtrs when at least one is unloaded.
+// Assume classes are different since called after check for same name/class-loader
+const TypeInstPtr *TypeInstPtr::xmeet_unloaded(const TypeInstPtr *tinst) const {
+    int off = meet_offset(tinst->offset());
+    PTR ptr = meet_ptr(tinst->ptr());
+
+    const TypeInstPtr *loaded    = is_loaded() ? this  : tinst;
+    const TypeInstPtr *unloaded  = is_loaded() ? tinst : this;
+    if( loaded->klass()->equals(ciEnv::current()->Object_klass()) ) {
+      //
+      // Meet unloaded class with java/lang/Object
+      //
+      // Meet
+      //          |                     Unloaded Class
+      //  Object  |   TOP    |   AnyNull | Constant |   NotNull |  BOTTOM   |
+      //  ===================================================================
+      //   TOP    | ..........................Unloaded......................|
+      //  AnyNull |  U-AN    |................Unloaded......................|
+      // Constant | ... O-NN .................................. |   O-BOT   |
+      //  NotNull | ... O-NN .................................. |   O-BOT   |
+      //  BOTTOM  | ........................Object-BOTTOM ..................|
+      //
+      assert(loaded->ptr() != TypePtr::Null, "insanity check");
+      //
+      if(      loaded->ptr() == TypePtr::TopPTR ) { return unloaded; }
+      else if (loaded->ptr() == TypePtr::AnyNull) { return TypeInstPtr::make( ptr, unloaded->klass() ); }
+      else if (loaded->ptr() == TypePtr::BotPTR ) { return TypeInstPtr::BOTTOM; }
+      else if (loaded->ptr() == TypePtr::Constant || loaded->ptr() == TypePtr::NotNull) {
+        if (unloaded->ptr() == TypePtr::BotPTR  ) { return TypeInstPtr::BOTTOM;  }
+        else                                      { return TypeInstPtr::NOTNULL; }
+      }
+      else if( unloaded->ptr() == TypePtr::TopPTR )  { return unloaded; }
+
+      return unloaded->cast_to_ptr_type(TypePtr::AnyNull)->is_instptr();
+    }
+
+    // Both are unloaded, not the same class, not Object
+    // Or meet unloaded with a different loaded class, not java/lang/Object
+    if( ptr != TypePtr::BotPTR ) {
+      return TypeInstPtr::NOTNULL;
+    }
+    return TypeInstPtr::BOTTOM;
+}
+
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeInstPtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Pointer
+  switch (t->base()) {          // switch on original type
+
+  case Int:                     // Mixing ints & oops happens when javac
+  case Long:                    // reuses local variables
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  case Top:
+    return this;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case RawPtr: return TypePtr::BOTTOM;
+
+  case AryPtr: {                // All arrays inherit from Object class
+    const TypeAryPtr *tp = t->is_aryptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    int iid = meet_instance(tp->instance_id());
+    switch (ptr) {
+    case TopPTR:
+    case AnyNull:                // Fall 'down' to dual of object klass
+      if (klass()->equals(ciEnv::current()->Object_klass())) {
+        return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, iid);
+      } else {
+        // cannot subclass, so the meet has to fall badly below the centerline
+        ptr = NotNull;
+        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, iid);
+      }
+    case Constant:
+    case NotNull:
+    case BotPTR:                // Fall down to object klass
+      // LCA is object_klass, but if we subclass from the top we can do better
+      if( above_centerline(_ptr) ) { // if( _ptr == TopPTR || _ptr == AnyNull )
+        // If 'this' (InstPtr) is above the centerline and it is Object class
+        // then we can subclass in the Java class heirarchy.
+        if (klass()->equals(ciEnv::current()->Object_klass())) {
+          // that is, tp's array type is a subtype of my klass
+          return TypeAryPtr::make(ptr, tp->ary(), tp->klass(), tp->klass_is_exact(), offset, iid);
+        }
+      }
+      // The other case cannot happen, since I cannot be a subtype of an array.
+      // The meet falls down to Object class below centerline.
+      if( ptr == Constant )
+         ptr = NotNull;
+      return make( ptr, ciEnv::current()->Object_klass(), false, NULL, offset, iid );
+    default: typerr(t);
+    }
+  }
+
+  case OopPtr: {                // Meeting to OopPtrs
+    // Found a OopPtr type vs self-InstPtr type
+    const TypePtr *tp = t->is_oopptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case TopPTR:
+    case AnyNull:
+      return make(ptr, klass(), klass_is_exact(),
+                  (ptr == Constant ? const_oop() : NULL), offset);
+    case NotNull:
+    case BotPTR:
+      return TypeOopPtr::make(ptr, offset);
+    default: typerr(t);
+    }
+  }
+
+  case AnyPtr: {                // Meeting to AnyPtrs
+    // Found an AnyPtr type vs self-InstPtr type
+    const TypePtr *tp = t->is_ptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case Null:
+      if( ptr == Null ) return TypePtr::make( AnyPtr, ptr, offset );
+    case TopPTR:
+    case AnyNull:
+      return make( ptr, klass(), klass_is_exact(),
+                   (ptr == Constant ? const_oop() : NULL), offset );
+    case NotNull:
+    case BotPTR:
+      return TypePtr::make( AnyPtr, ptr, offset );
+    default: typerr(t);
+    }
+  }
+
+  /*
+                 A-top         }
+               /   |   \       }  Tops
+           B-top A-any C-top   }
+              | /  |  \ |      }  Any-nulls
+           B-any   |   C-any   }
+              |    |    |
+           B-con A-con C-con   } constants; not comparable across classes
+              |    |    |
+           B-not   |   C-not   }
+              | \  |  / |      }  not-nulls
+           B-bot A-not C-bot   }
+               \   |   /       }  Bottoms
+                 A-bot         }
+  */
+
+  case InstPtr: {                // Meeting 2 Oops?
+    // Found an InstPtr sub-type vs self-InstPtr type
+    const TypeInstPtr *tinst = t->is_instptr();
+    int off = meet_offset( tinst->offset() );
+    PTR ptr = meet_ptr( tinst->ptr() );
+    int instance_id = meet_instance(tinst->instance_id());
+
+    // Check for easy case; klasses are equal (and perhaps not loaded!)
+    // If we have constants, then we created oops so classes are loaded
+    // and we can handle the constants further down.  This case handles
+    // both-not-loaded or both-loaded classes
+    if (ptr != Constant && klass()->equals(tinst->klass()) && klass_is_exact() == tinst->klass_is_exact()) {
+      return make( ptr, klass(), klass_is_exact(), NULL, off, instance_id );
+    }
+
+    // Classes require inspection in the Java klass hierarchy.  Must be loaded.
+    ciKlass* tinst_klass = tinst->klass();
+    ciKlass* this_klass  = this->klass();
+    bool tinst_xk = tinst->klass_is_exact();
+    bool this_xk  = this->klass_is_exact();
+    if (!tinst_klass->is_loaded() || !this_klass->is_loaded() ) {
+      // One of these classes has not been loaded
+      const TypeInstPtr *unloaded_meet = xmeet_unloaded(tinst);
+#ifndef PRODUCT
+      if( PrintOpto && Verbose ) {
+        tty->print("meet of unloaded classes resulted in: "); unloaded_meet->dump(); tty->cr();
+        tty->print("  this == "); this->dump(); tty->cr();
+        tty->print(" tinst == "); tinst->dump(); tty->cr();
+      }
+#endif
+      return unloaded_meet;
+    }
+
+    // Handle mixing oops and interfaces first.
+    if( this_klass->is_interface() && !tinst_klass->is_interface() ) {
+      ciKlass *tmp = tinst_klass; // Swap interface around
+      tinst_klass = this_klass;
+      this_klass = tmp;
+      bool tmp2 = tinst_xk;
+      tinst_xk = this_xk;
+      this_xk = tmp2;
+    }
+    if (tinst_klass->is_interface() &&
+        !(this_klass->is_interface() ||
+          // Treat java/lang/Object as an honorary interface,
+          // because we need a bottom for the interface hierarchy.
+          this_klass == ciEnv::current()->Object_klass())) {
+      // Oop meets interface!
+
+      // See if the oop subtypes (implements) interface.
+      ciKlass *k;
+      bool xk;
+      if( this_klass->is_subtype_of( tinst_klass ) ) {
+        // Oop indeed subtypes.  Now keep oop or interface depending
+        // on whether we are both above the centerline or either is
+        // below the centerline.  If we are on the centerline
+        // (e.g., Constant vs. AnyNull interface), use the constant.
+        k  = below_centerline(ptr) ? tinst_klass : this_klass;
+        // If we are keeping this_klass, keep its exactness too.
+        xk = below_centerline(ptr) ? tinst_xk    : this_xk;
+      } else {                  // Does not implement, fall to Object
+        // Oop does not implement interface, so mixing falls to Object
+        // just like the verifier does (if both are above the
+        // centerline fall to interface)
+        k = above_centerline(ptr) ? tinst_klass : ciEnv::current()->Object_klass();
+        xk = above_centerline(ptr) ? tinst_xk : false;
+        // Watch out for Constant vs. AnyNull interface.
+        if (ptr == Constant)  ptr = NotNull;   // forget it was a constant
+      }
+      ciObject* o = NULL;  // the Constant value, if any
+      if (ptr == Constant) {
+        // Find out which constant.
+        o = (this_klass == klass()) ? const_oop() : tinst->const_oop();
+      }
+      return make( ptr, k, xk, o, off );
+    }
+
+    // Either oop vs oop or interface vs interface or interface vs Object
+
+    // !!! Here's how the symmetry requirement breaks down into invariants:
+    // If we split one up & one down AND they subtype, take the down man.
+    // If we split one up & one down AND they do NOT subtype, "fall hard".
+    // If both are up and they subtype, take the subtype class.
+    // If both are up and they do NOT subtype, "fall hard".
+    // If both are down and they subtype, take the supertype class.
+    // If both are down and they do NOT subtype, "fall hard".
+    // Constants treated as down.
+
+    // Now, reorder the above list; observe that both-down+subtype is also
+    // "fall hard"; "fall hard" becomes the default case:
+    // If we split one up & one down AND they subtype, take the down man.
+    // If both are up and they subtype, take the subtype class.
+
+    // If both are down and they subtype, "fall hard".
+    // If both are down and they do NOT subtype, "fall hard".
+    // If both are up and they do NOT subtype, "fall hard".
+    // If we split one up & one down AND they do NOT subtype, "fall hard".
+
+    // If a proper subtype is exact, and we return it, we return it exactly.
+    // If a proper supertype is exact, there can be no subtyping relationship!
+    // If both types are equal to the subtype, exactness is and-ed below the
+    // centerline and or-ed above it.  (N.B. Constants are always exact.)
+
+    // Check for subtyping:
+    ciKlass *subtype = NULL;
+    bool subtype_exact = false;
+    if( tinst_klass->equals(this_klass) ) {
+      subtype = this_klass;
+      subtype_exact = below_centerline(ptr) ? (this_xk & tinst_xk) : (this_xk | tinst_xk);
+    } else if( !tinst_xk && this_klass->is_subtype_of( tinst_klass ) ) {
+      subtype = this_klass;     // Pick subtyping class
+      subtype_exact = this_xk;
+    } else if( !this_xk && tinst_klass->is_subtype_of( this_klass ) ) {
+      subtype = tinst_klass;    // Pick subtyping class
+      subtype_exact = tinst_xk;
+    }
+
+    if( subtype ) {
+      if( above_centerline(ptr) ) { // both are up?
+        this_klass = tinst_klass = subtype;
+        this_xk = tinst_xk = subtype_exact;
+      } else if( above_centerline(this ->_ptr) && !above_centerline(tinst->_ptr) ) {
+        this_klass = tinst_klass; // tinst is down; keep down man
+        this_xk = tinst_xk;
+      } else if( above_centerline(tinst->_ptr) && !above_centerline(this ->_ptr) ) {
+        tinst_klass = this_klass; // this is down; keep down man
+        tinst_xk = this_xk;
+      } else {
+        this_xk = subtype_exact;  // either they are equal, or we'll do an LCA
+      }
+    }
+
+    // Check for classes now being equal
+    if (tinst_klass->equals(this_klass)) {
+      // If the klasses are equal, the constants may still differ.  Fall to
+      // NotNull if they do (neither constant is NULL; that is a special case
+      // handled elsewhere).
+      ciObject* o = NULL;             // Assume not constant when done
+      ciObject* this_oop  = const_oop();
+      ciObject* tinst_oop = tinst->const_oop();
+      if( ptr == Constant ) {
+        if (this_oop != NULL && tinst_oop != NULL &&
+            this_oop->equals(tinst_oop) )
+          o = this_oop;
+        else if (above_centerline(this ->_ptr))
+          o = tinst_oop;
+        else if (above_centerline(tinst ->_ptr))
+          o = this_oop;
+        else
+          ptr = NotNull;
+      }
+      return make( ptr, this_klass, this_xk, o, off, instance_id );
+    } // Else classes are not equal
+
+    // Since klasses are different, we require a LCA in the Java
+    // class hierarchy - which means we have to fall to at least NotNull.
+    if( ptr == TopPTR || ptr == AnyNull || ptr == Constant )
+      ptr = NotNull;
+
+    // Now we find the LCA of Java classes
+    ciKlass* k = this_klass->least_common_ancestor(tinst_klass);
+    return make( ptr, k, false, NULL, off );
+  } // End of case InstPtr
+
+  case KlassPtr:
+    return TypeInstPtr::BOTTOM;
+
+  } // End of switch
+  return this;                  // Return the double constant
+}
+
+
+//------------------------java_mirror_type--------------------------------------
+ciType* TypeInstPtr::java_mirror_type() const {
+  // must be a singleton type
+  if( const_oop() == NULL )  return NULL;
+
+  // must be of type java.lang.Class
+  if( klass() != ciEnv::current()->Class_klass() )  return NULL;
+
+  return const_oop()->as_instance()->java_mirror_type();
+}
+
+
+//------------------------------xdual------------------------------------------
+// Dual: do NOT dual on klasses.  This means I do NOT understand the Java
+// inheritence mechanism.
+const Type *TypeInstPtr::xdual() const {
+  return new TypeInstPtr( dual_ptr(), klass(), klass_is_exact(), const_oop(), dual_offset(), dual_instance()  );
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeInstPtr::eq( const Type *t ) const {
+  const TypeInstPtr *p = t->is_instptr();
+  return
+    klass()->equals(p->klass()) &&
+    TypeOopPtr::eq(p);          // Check sub-type stuff
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeInstPtr::hash(void) const {
+  int hash = klass()->hash() + TypeOopPtr::hash();
+  return hash;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump oop Type
+#ifndef PRODUCT
+void TypeInstPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+  // Print the name of the klass.
+  klass()->print_name_on(st);
+
+  switch( _ptr ) {
+  case Constant:
+    // TO DO: Make CI print the hex address of the underlying oop.
+    if (WizardMode || Verbose) {
+      const_oop()->print_oop(st);
+    }
+  case BotPTR:
+    if (!WizardMode && !Verbose) {
+      if( _klass_is_exact ) st->print(":exact");
+      break;
+    }
+  case TopPTR:
+  case AnyNull:
+  case NotNull:
+    st->print(":%s", ptr_msg[_ptr]);
+    if( _klass_is_exact ) st->print(":exact");
+    break;
+  }
+
+  if( _offset ) {               // Dump offset, if any
+    if( _offset == OffsetBot )      st->print("+any");
+    else if( _offset == OffsetTop ) st->print("+unknown");
+    else st->print("+%d", _offset);
+  }
+
+  st->print(" *");
+  if (_instance_id != UNKNOWN_INSTANCE)
+    st->print(",iid=%d",_instance_id);
+}
+#endif
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeInstPtr::add_offset( int offset ) const {
+  return make( _ptr, klass(), klass_is_exact(), const_oop(), xadd_offset(offset), _instance_id );
+}
+
+//=============================================================================
+// Convenience common pre-built types.
+const TypeAryPtr *TypeAryPtr::RANGE;
+const TypeAryPtr *TypeAryPtr::OOPS;
+const TypeAryPtr *TypeAryPtr::BYTES;
+const TypeAryPtr *TypeAryPtr::SHORTS;
+const TypeAryPtr *TypeAryPtr::CHARS;
+const TypeAryPtr *TypeAryPtr::INTS;
+const TypeAryPtr *TypeAryPtr::LONGS;
+const TypeAryPtr *TypeAryPtr::FLOATS;
+const TypeAryPtr *TypeAryPtr::DOUBLES;
+
+//------------------------------make-------------------------------------------
+const TypeAryPtr *TypeAryPtr::make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) {
+  assert(!(k == NULL && ary->_elem->isa_int()),
+         "integral arrays must be pre-equipped with a class");
+  if (!xk)  xk = ary->ary_must_be_exact();
+  if (instance_id != UNKNOWN_INSTANCE)
+    xk = true;  // instances are always exactly typed
+  if (!UseExactTypes)  xk = (ptr == Constant);
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, NULL, ary, k, xk, offset, instance_id))->hashcons();
+}
+
+//------------------------------make-------------------------------------------
+const TypeAryPtr *TypeAryPtr::make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) {
+  assert(!(k == NULL && ary->_elem->isa_int()),
+         "integral arrays must be pre-equipped with a class");
+  assert( (ptr==Constant && o) || (ptr!=Constant && !o), "" );
+  if (!xk)  xk = (o != NULL) || ary->ary_must_be_exact();
+  if (instance_id != UNKNOWN_INSTANCE)
+    xk = true;  // instances are always exactly typed
+  if (!UseExactTypes)  xk = (ptr == Constant);
+  return (TypeAryPtr*)(new TypeAryPtr(ptr, o, ary, k, xk, offset, instance_id))->hashcons();
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeAryPtr::cast_to_ptr_type(PTR ptr) const {
+  if( ptr == _ptr ) return this;
+  return make(ptr, const_oop(), _ary, klass(), klass_is_exact(), _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeAryPtr::cast_to_exactness(bool klass_is_exact) const {
+  if( klass_is_exact == _klass_is_exact ) return this;
+  if (!UseExactTypes)  return this;
+  if (_ary->ary_must_be_exact())  return this;  // cannot clear xk
+  return make(ptr(), const_oop(), _ary, klass(), klass_is_exact, _offset, _instance_id);
+}
+
+//-----------------------------cast_to_instance-------------------------------
+const TypeOopPtr *TypeAryPtr::cast_to_instance(int instance_id) const {
+  if( instance_id == _instance_id) return this;
+  bool exact = (instance_id == UNKNOWN_INSTANCE) ? _klass_is_exact : true;
+  return make(ptr(), const_oop(), _ary, klass(), exact, _offset, instance_id);
+}
+
+//-----------------------------narrow_size_type-------------------------------
+// Local cache for arrayOopDesc::max_array_length(etype),
+// which is kind of slow (and cached elsewhere by other users).
+static jint max_array_length_cache[T_CONFLICT+1];
+static jint max_array_length(BasicType etype) {
+  jint& cache = max_array_length_cache[etype];
+  jint res = cache;
+  if (res == 0) {
+    switch (etype) {
+    case T_CONFLICT:
+    case T_ILLEGAL:
+    case T_VOID:
+      etype = T_BYTE;           // will produce conservatively high value
+    }
+    cache = res = arrayOopDesc::max_array_length(etype);
+  }
+  return res;
+}
+
+// Narrow the given size type to the index range for the given array base type.
+// Return NULL if the resulting int type becomes empty.
+const TypeInt* TypeAryPtr::narrow_size_type(const TypeInt* size, BasicType elem) {
+  jint hi = size->_hi;
+  jint lo = size->_lo;
+  jint min_lo = 0;
+  jint max_hi = max_array_length(elem);
+  //if (index_not_size)  --max_hi;     // type of a valid array index, FTR
+  bool chg = false;
+  if (lo < min_lo) { lo = min_lo; chg = true; }
+  if (hi > max_hi) { hi = max_hi; chg = true; }
+  if (lo > hi)
+    return NULL;
+  if (!chg)
+    return size;
+  return TypeInt::make(lo, hi, Type::WidenMin);
+}
+
+//-------------------------------cast_to_size----------------------------------
+const TypeAryPtr* TypeAryPtr::cast_to_size(const TypeInt* new_size) const {
+  assert(new_size != NULL, "");
+  new_size = narrow_size_type(new_size, elem()->basic_type());
+  if (new_size == NULL)       // Negative length arrays will produce weird
+    new_size = TypeInt::ZERO; // intermediate dead fast-path goo
+  if (new_size == size())  return this;
+  const TypeAry* new_ary = TypeAry::make(elem(), new_size);
+  return make(ptr(), const_oop(), new_ary, klass(), klass_is_exact(), _offset);
+}
+
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeAryPtr::eq( const Type *t ) const {
+  const TypeAryPtr *p = t->is_aryptr();
+  return
+    _ary == p->_ary &&  // Check array
+    TypeOopPtr::eq(p);  // Check sub-parts
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeAryPtr::hash(void) const {
+  return (intptr_t)_ary + TypeOopPtr::hash();
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeAryPtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+  // Current "this->_base" is Pointer
+  switch (t->base()) {          // switch on original type
+
+  // Mixing ints & oops happens when javac reuses local variables
+  case Int:
+  case Long:
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  case Top:
+    return this;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case OopPtr: {                // Meeting to OopPtrs
+    // Found a OopPtr type vs self-AryPtr type
+    const TypePtr *tp = t->is_oopptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case TopPTR:
+    case AnyNull:
+      return make(ptr, (ptr == Constant ? const_oop() : NULL), _ary, _klass, _klass_is_exact, offset);
+    case BotPTR:
+    case NotNull:
+      return TypeOopPtr::make(ptr, offset);
+    default: ShouldNotReachHere();
+    }
+  }
+
+  case AnyPtr: {                // Meeting two AnyPtrs
+    // Found an AnyPtr type vs self-AryPtr type
+    const TypePtr *tp = t->is_ptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case TopPTR:
+      return this;
+    case BotPTR:
+    case NotNull:
+      return TypePtr::make(AnyPtr, ptr, offset);
+    case Null:
+      if( ptr == Null ) return TypePtr::make(AnyPtr, ptr, offset);
+    case AnyNull:
+      return make( ptr, (ptr == Constant ? const_oop() : NULL), _ary, _klass, _klass_is_exact, offset );
+    default: ShouldNotReachHere();
+    }
+  }
+
+  case RawPtr: return TypePtr::BOTTOM;
+
+  case AryPtr: {                // Meeting 2 references?
+    const TypeAryPtr *tap = t->is_aryptr();
+    int off = meet_offset(tap->offset());
+    const TypeAry *tary = _ary->meet(tap->_ary)->is_ary();
+    PTR ptr = meet_ptr(tap->ptr());
+    int iid = meet_instance(tap->instance_id());
+    ciKlass* lazy_klass = NULL;
+    if (tary->_elem->isa_int()) {
+      // Integral array element types have irrelevant lattice relations.
+      // It is the klass that determines array layout, not the element type.
+      if (_klass == NULL)
+        lazy_klass = tap->_klass;
+      else if (tap->_klass == NULL || tap->_klass == _klass) {
+        lazy_klass = _klass;
+      } else {
+        // Something like byte[int+] meets char[int+].
+        // This must fall to bottom, not (int[-128..65535])[int+].
+        tary = TypeAry::make(Type::BOTTOM, tary->_size);
+      }
+    }
+    bool xk;
+    switch (tap->ptr()) {
+    case AnyNull:
+    case TopPTR:
+      // Compute new klass on demand, do not use tap->_klass
+      xk = (tap->_klass_is_exact | this->_klass_is_exact);
+      return make( ptr, const_oop(), tary, lazy_klass, xk, off );
+    case Constant: {
+      ciObject* o = const_oop();
+      if( _ptr == Constant ) {
+        if( tap->const_oop() != NULL && !o->equals(tap->const_oop()) ) {
+          ptr = NotNull;
+          o = NULL;
+        }
+      } else if( above_centerline(_ptr) ) {
+        o = tap->const_oop();
+      }
+      xk = true;
+      return TypeAryPtr::make( ptr, o, tary, tap->_klass, xk, off );
+    }
+    case NotNull:
+    case BotPTR:
+      // Compute new klass on demand, do not use tap->_klass
+      if (above_centerline(this->_ptr))
+            xk = tap->_klass_is_exact;
+      else if (above_centerline(tap->_ptr))
+            xk = this->_klass_is_exact;
+      else  xk = (tap->_klass_is_exact & this->_klass_is_exact) &&
+              (klass() == tap->klass()); // Only precise for identical arrays
+      return TypeAryPtr::make( ptr, NULL, tary, lazy_klass, xk, off, iid );
+    default: ShouldNotReachHere();
+    }
+  }
+
+  // All arrays inherit from Object class
+  case InstPtr: {
+    const TypeInstPtr *tp = t->is_instptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    int iid = meet_instance(tp->instance_id());
+    switch (ptr) {
+    case TopPTR:
+    case AnyNull:                // Fall 'down' to dual of object klass
+      if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) {
+        return TypeAryPtr::make( ptr, _ary, _klass, _klass_is_exact, offset, iid );
+      } else {
+        // cannot subclass, so the meet has to fall badly below the centerline
+        ptr = NotNull;
+        return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL,offset, iid);
+      }
+    case Constant:
+    case NotNull:
+    case BotPTR:                // Fall down to object klass
+      // LCA is object_klass, but if we subclass from the top we can do better
+      if (above_centerline(tp->ptr())) {
+        // If 'tp'  is above the centerline and it is Object class
+        // then we can subclass in the Java class heirarchy.
+        if( tp->klass()->equals(ciEnv::current()->Object_klass()) ) {
+          // that is, my array type is a subtype of 'tp' klass
+          return make( ptr, _ary, _klass, _klass_is_exact, offset, iid );
+        }
+      }
+      // The other case cannot happen, since t cannot be a subtype of an array.
+      // The meet falls down to Object class below centerline.
+      if( ptr == Constant )
+         ptr = NotNull;
+      return TypeInstPtr::make( ptr, ciEnv::current()->Object_klass(), false, NULL,offset, iid);
+    default: typerr(t);
+    }
+  }
+
+  case KlassPtr:
+    return TypeInstPtr::BOTTOM;
+
+  }
+  return this;                  // Lint noise
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeAryPtr::xdual() const {
+  return new TypeAryPtr( dual_ptr(), _const_oop, _ary->dual()->is_ary(),_klass, _klass_is_exact, dual_offset(), dual_instance() );
+}
+
+//------------------------------dump2------------------------------------------
+#ifndef PRODUCT
+void TypeAryPtr::dump2( Dict &d, uint depth, outputStream *st ) const {
+  _ary->dump2(d,depth,st);
+  switch( _ptr ) {
+  case Constant:
+    const_oop()->print(st);
+    break;
+  case BotPTR:
+    if (!WizardMode && !Verbose) {
+      if( _klass_is_exact ) st->print(":exact");
+      break;
+    }
+  case TopPTR:
+  case AnyNull:
+  case NotNull:
+    st->print(":%s", ptr_msg[_ptr]);
+    if( _klass_is_exact ) st->print(":exact");
+    break;
+  }
+
+  st->print("*");
+  if (_instance_id != UNKNOWN_INSTANCE)
+    st->print(",iid=%d",_instance_id);
+  if( !_offset ) return;
+  if( _offset == OffsetTop )      st->print("+undefined");
+  else if( _offset == OffsetBot ) st->print("+any");
+  else if( _offset < 12 )         st->print("+%d",_offset);
+  else                            st->print("[%d]", (_offset-12)/4 );
+}
+#endif
+
+bool TypeAryPtr::empty(void) const {
+  if (_ary->empty())       return true;
+  return TypeOopPtr::empty();
+}
+
+//------------------------------add_offset-------------------------------------
+const TypePtr *TypeAryPtr::add_offset( int offset ) const {
+  return make( _ptr, _const_oop, _ary, _klass, _klass_is_exact, xadd_offset(offset), _instance_id );
+}
+
+
+//=============================================================================
+// Convenience common pre-built types.
+
+// Not-null object klass or below
+const TypeKlassPtr *TypeKlassPtr::OBJECT;
+const TypeKlassPtr *TypeKlassPtr::OBJECT_OR_NULL;
+
+//------------------------------TypeKlasPtr------------------------------------
+TypeKlassPtr::TypeKlassPtr( PTR ptr, ciKlass* klass, int offset )
+  : TypeOopPtr(KlassPtr, ptr, klass, (ptr==Constant), (ptr==Constant ? klass : NULL), offset, 0) {
+}
+
+//------------------------------make-------------------------------------------
+// ptr to klass 'k', if Constant, or possibly to a sub-klass if not a Constant
+const TypeKlassPtr *TypeKlassPtr::make( PTR ptr, ciKlass* k, int offset ) {
+  assert( k != NULL, "Expect a non-NULL klass");
+  assert(k->is_instance_klass() || k->is_array_klass() ||
+         k->is_method_klass(), "Incorrect type of klass oop");
+  TypeKlassPtr *r =
+    (TypeKlassPtr*)(new TypeKlassPtr(ptr, k, offset))->hashcons();
+
+  return r;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeKlassPtr::eq( const Type *t ) const {
+  const TypeKlassPtr *p = t->is_klassptr();
+  return
+    klass()->equals(p->klass()) &&
+    TypeOopPtr::eq(p);
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeKlassPtr::hash(void) const {
+  return klass()->hash() + TypeOopPtr::hash();
+}
+
+
+//------------------------------klass------------------------------------------
+// Return the defining klass for this class
+ciKlass* TypeAryPtr::klass() const {
+  if( _klass ) return _klass;   // Return cached value, if possible
+
+  // Oops, need to compute _klass and cache it
+  ciKlass* k_ary = NULL;
+  const TypeInstPtr *tinst;
+  const TypeAryPtr *tary;
+  // Get element klass
+  if ((tinst = elem()->isa_instptr()) != NULL) {
+    // Compute array klass from element klass
+    k_ary = ciObjArrayKlass::make(tinst->klass());
+  } else if ((tary = elem()->isa_aryptr()) != NULL) {
+    // Compute array klass from element klass
+    ciKlass* k_elem = tary->klass();
+    // If element type is something like bottom[], k_elem will be null.
+    if (k_elem != NULL)
+      k_ary = ciObjArrayKlass::make(k_elem);
+  } else if ((elem()->base() == Type::Top) ||
+             (elem()->base() == Type::Bottom)) {
+    // element type of Bottom occurs from meet of basic type
+    // and object; Top occurs when doing join on Bottom.
+    // Leave k_ary at NULL.
+  } else {
+    // Cannot compute array klass directly from basic type,
+    // since subtypes of TypeInt all have basic type T_INT.
+    assert(!elem()->isa_int(),
+           "integral arrays must be pre-equipped with a class");
+    // Compute array klass directly from basic type
+    k_ary = ciTypeArrayKlass::make(elem()->basic_type());
+  }
+
+  if( this != TypeAryPtr::OOPS )
+    // The _klass field acts as a cache of the underlying
+    // ciKlass for this array type.  In order to set the field,
+    // we need to cast away const-ness.
+    //
+    // IMPORTANT NOTE: we *never* set the _klass field for the
+    // type TypeAryPtr::OOPS.  This Type is shared between all
+    // active compilations.  However, the ciKlass which represents
+    // this Type is *not* shared between compilations, so caching
+    // this value would result in fetching a dangling pointer.
+    //
+    // Recomputing the underlying ciKlass for each request is
+    // a bit less efficient than caching, but calls to
+    // TypeAryPtr::OOPS->klass() are not common enough to matter.
+    ((TypeAryPtr*)this)->_klass = k_ary;
+  return k_ary;
+}
+
+
+//------------------------------add_offset-------------------------------------
+// Access internals of klass object
+const TypePtr *TypeKlassPtr::add_offset( int offset ) const {
+  return make( _ptr, klass(), xadd_offset(offset) );
+}
+
+//------------------------------cast_to_ptr_type-------------------------------
+const Type *TypeKlassPtr::cast_to_ptr_type(PTR ptr) const {
+  assert(_base == OopPtr, "subclass must override cast_to_ptr_type");
+  if( ptr == _ptr ) return this;
+  return make(ptr, _klass, _offset);
+}
+
+
+//-----------------------------cast_to_exactness-------------------------------
+const Type *TypeKlassPtr::cast_to_exactness(bool klass_is_exact) const {
+  if( klass_is_exact == _klass_is_exact ) return this;
+  if (!UseExactTypes)  return this;
+  return make(klass_is_exact ? Constant : NotNull, _klass, _offset);
+}
+
+
+//-----------------------------as_instance_type--------------------------------
+// Corresponding type for an instance of the given class.
+// It will be NotNull, and exact if and only if the klass type is exact.
+const TypeOopPtr* TypeKlassPtr::as_instance_type() const {
+  ciKlass* k = klass();
+  bool    xk = klass_is_exact();
+  //return TypeInstPtr::make(TypePtr::NotNull, k, xk, NULL, 0);
+  const TypeOopPtr* toop = TypeOopPtr::make_from_klass_raw(k);
+  toop = toop->cast_to_ptr_type(TypePtr::NotNull)->is_oopptr();
+  return toop->cast_to_exactness(xk)->is_oopptr();
+}
+
+
+//------------------------------xmeet------------------------------------------
+// Compute the MEET of two types, return a new Type object.
+const Type    *TypeKlassPtr::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Pointer
+  switch (t->base()) {          // switch on original type
+
+  case Int:                     // Mixing ints & oops happens when javac
+  case Long:                    // reuses local variables
+  case FloatTop:
+  case FloatCon:
+  case FloatBot:
+  case DoubleTop:
+  case DoubleCon:
+  case DoubleBot:
+  case Bottom:                  // Ye Olde Default
+    return Type::BOTTOM;
+  case Top:
+    return this;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case RawPtr: return TypePtr::BOTTOM;
+
+  case OopPtr: {                // Meeting to OopPtrs
+    // Found a OopPtr type vs self-KlassPtr type
+    const TypePtr *tp = t->is_oopptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case TopPTR:
+    case AnyNull:
+      return make(ptr, klass(), offset);
+    case BotPTR:
+    case NotNull:
+      return TypePtr::make(AnyPtr, ptr, offset);
+    default: typerr(t);
+    }
+  }
+
+  case AnyPtr: {                // Meeting to AnyPtrs
+    // Found an AnyPtr type vs self-KlassPtr type
+    const TypePtr *tp = t->is_ptr();
+    int offset = meet_offset(tp->offset());
+    PTR ptr = meet_ptr(tp->ptr());
+    switch (tp->ptr()) {
+    case TopPTR:
+      return this;
+    case Null:
+      if( ptr == Null ) return TypePtr::make( AnyPtr, ptr, offset );
+    case AnyNull:
+      return make( ptr, klass(), offset );
+    case BotPTR:
+    case NotNull:
+      return TypePtr::make(AnyPtr, ptr, offset);
+    default: typerr(t);
+    }
+  }
+
+  case AryPtr:                  // Meet with AryPtr
+  case InstPtr:                 // Meet with InstPtr
+    return TypeInstPtr::BOTTOM;
+
+  //
+  //             A-top         }
+  //           /   |   \       }  Tops
+  //       B-top A-any C-top   }
+  //          | /  |  \ |      }  Any-nulls
+  //       B-any   |   C-any   }
+  //          |    |    |
+  //       B-con A-con C-con   } constants; not comparable across classes
+  //          |    |    |
+  //       B-not   |   C-not   }
+  //          | \  |  / |      }  not-nulls
+  //       B-bot A-not C-bot   }
+  //           \   |   /       }  Bottoms
+  //             A-bot         }
+  //
+
+  case KlassPtr: {  // Meet two KlassPtr types
+    const TypeKlassPtr *tkls = t->is_klassptr();
+    int  off     = meet_offset(tkls->offset());
+    PTR  ptr     = meet_ptr(tkls->ptr());
+
+    // Check for easy case; klasses are equal (and perhaps not loaded!)
+    // If we have constants, then we created oops so classes are loaded
+    // and we can handle the constants further down.  This case handles
+    // not-loaded classes
+    if( ptr != Constant && tkls->klass()->equals(klass()) ) {
+      return make( ptr, klass(), off );
+    }
+
+    // Classes require inspection in the Java klass hierarchy.  Must be loaded.
+    ciKlass* tkls_klass = tkls->klass();
+    ciKlass* this_klass = this->klass();
+    assert( tkls_klass->is_loaded(), "This class should have been loaded.");
+    assert( this_klass->is_loaded(), "This class should have been loaded.");
+
+    // If 'this' type is above the centerline and is a superclass of the
+    // other, we can treat 'this' as having the same type as the other.
+    if ((above_centerline(this->ptr())) &&
+        tkls_klass->is_subtype_of(this_klass)) {
+      this_klass = tkls_klass;
+    }
+    // If 'tinst' type is above the centerline and is a superclass of the
+    // other, we can treat 'tinst' as having the same type as the other.
+    if ((above_centerline(tkls->ptr())) &&
+        this_klass->is_subtype_of(tkls_klass)) {
+      tkls_klass = this_klass;
+    }
+
+    // Check for classes now being equal
+    if (tkls_klass->equals(this_klass)) {
+      // If the klasses are equal, the constants may still differ.  Fall to
+      // NotNull if they do (neither constant is NULL; that is a special case
+      // handled elsewhere).
+      ciObject* o = NULL;             // Assume not constant when done
+      ciObject* this_oop = const_oop();
+      ciObject* tkls_oop = tkls->const_oop();
+      if( ptr == Constant ) {
+        if (this_oop != NULL && tkls_oop != NULL &&
+            this_oop->equals(tkls_oop) )
+          o = this_oop;
+        else if (above_centerline(this->ptr()))
+          o = tkls_oop;
+        else if (above_centerline(tkls->ptr()))
+          o = this_oop;
+        else
+          ptr = NotNull;
+      }
+      return make( ptr, this_klass, off );
+    } // Else classes are not equal
+
+    // Since klasses are different, we require the LCA in the Java
+    // class hierarchy - which means we have to fall to at least NotNull.
+    if( ptr == TopPTR || ptr == AnyNull || ptr == Constant )
+      ptr = NotNull;
+    // Now we find the LCA of Java classes
+    ciKlass* k = this_klass->least_common_ancestor(tkls_klass);
+    return   make( ptr, k, off );
+  } // End of case KlassPtr
+
+  } // End of switch
+  return this;                  // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type    *TypeKlassPtr::xdual() const {
+  return new TypeKlassPtr( dual_ptr(), klass(), dual_offset() );
+}
+
+//------------------------------dump2------------------------------------------
+// Dump Klass Type
+#ifndef PRODUCT
+void TypeKlassPtr::dump2( Dict & d, uint depth, outputStream *st ) const {
+  switch( _ptr ) {
+  case Constant:
+    st->print("precise ");
+  case NotNull:
+    {
+      const char *name = klass()->name()->as_utf8();
+      if( name ) {
+        st->print("klass %s: " INTPTR_FORMAT, name, klass());
+      } else {
+        ShouldNotReachHere();
+      }
+    }
+  case BotPTR:
+    if( !WizardMode && !Verbose && !_klass_is_exact ) break;
+  case TopPTR:
+  case AnyNull:
+    st->print(":%s", ptr_msg[_ptr]);
+    if( _klass_is_exact ) st->print(":exact");
+    break;
+  }
+
+  if( _offset ) {               // Dump offset, if any
+    if( _offset == OffsetBot )      { st->print("+any"); }
+    else if( _offset == OffsetTop ) { st->print("+unknown"); }
+    else                            { st->print("+%d", _offset); }
+  }
+
+  st->print(" *");
+}
+#endif
+
+
+
+//=============================================================================
+// Convenience common pre-built types.
+
+//------------------------------make-------------------------------------------
+const TypeFunc *TypeFunc::make( const TypeTuple *domain, const TypeTuple *range ) {
+  return (TypeFunc*)(new TypeFunc(domain,range))->hashcons();
+}
+
+//------------------------------make-------------------------------------------
+const TypeFunc *TypeFunc::make(ciMethod* method) {
+  Compile* C = Compile::current();
+  const TypeFunc* tf = C->last_tf(method); // check cache
+  if (tf != NULL)  return tf;  // The hit rate here is almost 50%.
+  const TypeTuple *domain;
+  if (method->flags().is_static()) {
+    domain = TypeTuple::make_domain(NULL, method->signature());
+  } else {
+    domain = TypeTuple::make_domain(method->holder(), method->signature());
+  }
+  const TypeTuple *range  = TypeTuple::make_range(method->signature());
+  tf = TypeFunc::make(domain, range);
+  C->set_last_tf(method, tf);  // fill cache
+  return tf;
+}
+
+//------------------------------meet-------------------------------------------
+// Compute the MEET of two types.  It returns a new Type object.
+const Type *TypeFunc::xmeet( const Type *t ) const {
+  // Perform a fast test for common case; meeting the same types together.
+  if( this == t ) return this;  // Meeting same type-rep?
+
+  // Current "this->_base" is Func
+  switch (t->base()) {          // switch on original type
+
+  case Bottom:                  // Ye Olde Default
+    return t;
+
+  default:                      // All else is a mistake
+    typerr(t);
+
+  case Top:
+    break;
+  }
+  return this;                  // Return the double constant
+}
+
+//------------------------------xdual------------------------------------------
+// Dual: compute field-by-field dual
+const Type *TypeFunc::xdual() const {
+  return this;
+}
+
+//------------------------------eq---------------------------------------------
+// Structural equality check for Type representations
+bool TypeFunc::eq( const Type *t ) const {
+  const TypeFunc *a = (const TypeFunc*)t;
+  return _domain == a->_domain &&
+    _range == a->_range;
+}
+
+//------------------------------hash-------------------------------------------
+// Type-specific hashing function.
+int TypeFunc::hash(void) const {
+  return (intptr_t)_domain + (intptr_t)_range;
+}
+
+//------------------------------dump2------------------------------------------
+// Dump Function Type
+#ifndef PRODUCT
+void TypeFunc::dump2( Dict &d, uint depth, outputStream *st ) const {
+  if( _range->_cnt <= Parms )
+    st->print("void");
+  else {
+    uint i;
+    for (i = Parms; i < _range->_cnt-1; i++) {
+      _range->field_at(i)->dump2(d,depth,st);
+      st->print("/");
+    }
+    _range->field_at(i)->dump2(d,depth,st);
+  }
+  st->print(" ");
+  st->print("( ");
+  if( !depth || d[this] ) {     // Check for recursive dump
+    st->print("...)");
+    return;
+  }
+  d.Insert((void*)this,(void*)this);    // Stop recursion
+  if (Parms < _domain->_cnt)
+    _domain->field_at(Parms)->dump2(d,depth-1,st);
+  for (uint i = Parms+1; i < _domain->_cnt; i++) {
+    st->print(", ");
+    _domain->field_at(i)->dump2(d,depth-1,st);
+  }
+  st->print(" )");
+}
+
+//------------------------------print_flattened--------------------------------
+// Print a 'flattened' signature
+static const char * const flat_type_msg[Type::lastype] = {
+  "bad","control","top","int","long","_",
+  "tuple:", "array:",
+  "ptr", "rawptr", "ptr", "ptr", "ptr", "ptr",
+  "func", "abIO", "return_address", "mem",
+  "float_top", "ftcon:", "flt",
+  "double_top", "dblcon:", "dbl",
+  "bottom"
+};
+
+void TypeFunc::print_flattened() const {
+  if( _range->_cnt <= Parms )
+    tty->print("void");
+  else {
+    uint i;
+    for (i = Parms; i < _range->_cnt-1; i++)
+      tty->print("%s/",flat_type_msg[_range->field_at(i)->base()]);
+    tty->print("%s",flat_type_msg[_range->field_at(i)->base()]);
+  }
+  tty->print(" ( ");
+  if (Parms < _domain->_cnt)
+    tty->print("%s",flat_type_msg[_domain->field_at(Parms)->base()]);
+  for (uint i = Parms+1; i < _domain->_cnt; i++)
+    tty->print(", %s",flat_type_msg[_domain->field_at(i)->base()]);
+  tty->print(" )");
+}
+#endif
+
+//------------------------------singleton--------------------------------------
+// TRUE if Type is a singleton type, FALSE otherwise.   Singletons are simple
+// constants (Ldi nodes).  Singletons are integer, float or double constants
+// or a single symbol.
+bool TypeFunc::singleton(void) const {
+  return false;                 // Never a singleton
+}
+
+bool TypeFunc::empty(void) const {
+  return false;                 // Never empty
+}
+
+
+BasicType TypeFunc::return_type() const{
+  if (range()->cnt() == TypeFunc::Parms) {
+    return T_VOID;
+  }
+  return range()->field_at(TypeFunc::Parms)->basic_type();
+}
diff --git a/src/share/vm/opto/type.hpp b/src/share/vm/opto/type.hpp
new file mode 100644
index 000000000..cca1e6404
--- /dev/null
+++ b/src/share/vm/opto/type.hpp
@@ -0,0 +1,1124 @@
+/*
+ * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ *
+ */
+
+// Portions of code courtesy of Clifford Click
+
+// Optimization - Graph Style
+
+
+// This class defines a Type lattice.  The lattice is used in the constant
+// propagation algorithms, and for some type-checking of the iloc code.
+// Basic types include RSD's (lower bound, upper bound, stride for integers),
+// float & double precision constants, sets of data-labels and code-labels.
+// The complete lattice is described below.  Subtypes have no relationship to
+// up or down in the lattice; that is entirely determined by the behavior of
+// the MEET/JOIN functions.
+
+class Dict;
+class Type;
+class   TypeD;
+class   TypeF;
+class   TypeInt;
+class   TypeLong;
+class   TypeAry;
+class   TypeTuple;
+class   TypePtr;
+class     TypeRawPtr;
+class     TypeOopPtr;
+class       TypeInstPtr;
+class       TypeAryPtr;
+class       TypeKlassPtr;
+
+//------------------------------Type-------------------------------------------
+// Basic Type object, represents a set of primitive Values.
+// Types are hash-cons'd into a private class dictionary, so only one of each
+// different kind of Type exists.  Types are never modified after creation, so
+// all their interesting fields are constant.
+class Type {
+public:
+  enum TYPES {
+    Bad=0,                      // Type check
+    Control,                    // Control of code (not in lattice)
+    Top,                        // Top of the lattice
+    Int,                        // Integer range (lo-hi)
+    Long,                       // Long integer range (lo-hi)
+    Half,                       // Placeholder half of doubleword
+
+    Tuple,                      // Method signature or object layout
+    Array,                      // Array types
+
+    AnyPtr,                     // Any old raw, klass, inst, or array pointer
+    RawPtr,                     // Raw (non-oop) pointers
+    OopPtr,                     // Any and all Java heap entities
+    InstPtr,                    // Instance pointers (non-array objects)
+    AryPtr,                     // Array pointers
+    KlassPtr,                   // Klass pointers
+    // (Ptr order matters:  See is_ptr, isa_ptr, is_oopptr, isa_oopptr.)
+
+    Function,                   // Function signature
+    Abio,                       // Abstract I/O
+    Return_Address,             // Subroutine return address
+    Memory,                     // Abstract store
+    FloatTop,                   // No float value
+    FloatCon,                   // Floating point constant
+    FloatBot,                   // Any float value
+    DoubleTop,                  // No double value
+    DoubleCon,                  // Double precision constant
+    DoubleBot,                  // Any double value
+    Bottom,                     // Bottom of lattice
+    lastype                     // Bogus ending type (not in lattice)
+  };
+
+  // Signal values for offsets from a base pointer
+  enum OFFSET_SIGNALS {
+    OffsetTop = -2000000000,    // undefined offset
+    OffsetBot = -2000000001     // any possible offset
+  };
+
+  // Min and max WIDEN values.
+  enum WIDEN {
+    WidenMin = 0,
+    WidenMax = 3
+  };
+
+private:
+  // Dictionary of types shared among compilations.
+  static Dict* _shared_type_dict;
+
+  static int uhash( const Type *const t );
+  // Structural equality check.  Assumes that cmp() has already compared
+  // the _base types and thus knows it can cast 't' appropriately.
+  virtual bool eq( const Type *t ) const;
+
+  // Top-level hash-table of types
+  static Dict *type_dict() {
+    return Compile::current()->type_dict();
+  }
+
+  // DUAL operation: reflect around lattice centerline.  Used instead of
+  // join to ensure my lattice is symmetric up and down.  Dual is computed
+  // lazily, on demand, and cached in _dual.
+  const Type *_dual;            // Cached dual value
+  // Table for efficient dualing of base types
+  static const TYPES dual_type[lastype];
+
+protected:
+  // Each class of type is also identified by its base.
+  const TYPES _base;            // Enum of Types type
+
+  Type( TYPES t ) : _dual(NULL),  _base(t) {} // Simple types
+  // ~Type();                   // Use fast deallocation
+  const Type *hashcons();       // Hash-cons the type
+
+public:
+
+  inline void* operator new( size_t x ) {
+    Compile* compile = Compile::current();
+    compile->set_type_last_size(x);
+    void *temp = compile->type_arena()->Amalloc_D(x);
+    compile->set_type_hwm(temp);
+    return temp;
+  }
+  inline void operator delete( void* ptr ) {
+    Compile* compile = Compile::current();
+    compile->type_arena()->Afree(ptr,compile->type_last_size());
+  }
+
+  // Initialize the type system for a particular compilation.
+  static void Initialize(Compile* compile);
+
+  // Initialize the types shared by all compilations.
+  static void Initialize_shared(Compile* compile);
+
+  TYPES base() const {
+    assert(_base > Bad && _base < lastype, "sanity");
+    return _base;
+  }
+
+  // Create a new hash-consd type
+  static const Type *make(enum TYPES);
+  // Test for equivalence of types
+  static int cmp( const Type *const t1, const Type *const t2 );
+  // Test for higher or equal in lattice
+  int higher_equal( const Type *t ) const { return !cmp(meet(t),t); }
+
+  // MEET operation; lower in lattice.
+  const Type *meet( const Type *t ) const;
+  // WIDEN: 'widens' for Ints and other range types
+  virtual const Type *widen( const Type *old ) const { return this; }
+  // NARROW: complement for widen, used by pessimistic phases
+  virtual const Type *narrow( const Type *old ) const { return this; }
+
+  // DUAL operation: reflect around lattice centerline.  Used instead of
+  // join to ensure my lattice is symmetric up and down.
+  const Type *dual() const { return _dual; }
+
+  // Compute meet dependent on base type
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  // JOIN operation; higher in lattice.  Done by finding the dual of the
+  // meet of the dual of the 2 inputs.
+  const Type *join( const Type *t ) const {
+    return dual()->meet(t->dual())->dual(); }
+
+  // Modified version of JOIN adapted to the needs Node::Value.
+  // Normalizes all empty values to TOP.  Does not kill _widen bits.
+  // Currently, it also works around limitations involving interface types.
+  virtual const Type *filter( const Type *kills ) const;
+
+  // Convenience access
+  float getf() const;
+  double getd() const;
+
+  const TypeInt    *is_int() const;
+  const TypeInt    *isa_int() const;             // Returns NULL if not an Int
+  const TypeLong   *is_long() const;
+  const TypeLong   *isa_long() const;            // Returns NULL if not a Long
+  const TypeD      *is_double_constant() const;  // Asserts it is a DoubleCon
+  const TypeD      *isa_double_constant() const; // Returns NULL if not a DoubleCon
+  const TypeF      *is_float_constant() const;   // Asserts it is a FloatCon
+  const TypeF      *isa_float_constant() const;  // Returns NULL if not a FloatCon
+  const TypeTuple  *is_tuple() const;            // Collection of fields, NOT a pointer
+  const TypeAry    *is_ary() const;              // Array, NOT array pointer
+  const TypePtr    *is_ptr() const;              // Asserts it is a ptr type
+  const TypePtr    *isa_ptr() const;             // Returns NULL if not ptr type
+  const TypeRawPtr *is_rawptr() const;           // NOT Java oop
+  const TypeOopPtr *isa_oopptr() const;          // Returns NULL if not ptr type
+  const TypeKlassPtr *isa_klassptr() const; // Returns NULL if not KlassPtr
+  const TypeKlassPtr *is_klassptr() const; // assert if not KlassPtr
+  const TypeOopPtr  *is_oopptr() const;          // Java-style GC'd pointer
+  const TypeInstPtr *isa_instptr() const;        // Returns NULL if not InstPtr
+  const TypeInstPtr *is_instptr() const;         // Instance
+  const TypeAryPtr *isa_aryptr() const;          // Returns NULL if not AryPtr
+  const TypeAryPtr *is_aryptr() const;           // Array oop
+  virtual bool      is_finite() const;           // Has a finite value
+  virtual bool      is_nan()    const;           // Is not a number (NaN)
+
+  // Special test for register pressure heuristic
+  bool is_floatingpoint() const;        // True if Float or Double base type
+
+  // Do you have memory, directly or through a tuple?
+  bool has_memory( ) const;
+
+  // Are you a pointer type or not?
+  bool isa_oop_ptr() const;
+
+  // TRUE if type is a singleton
+  virtual bool singleton(void) const;
+
+  // TRUE if type is above the lattice centerline, and is therefore vacuous
+  virtual bool empty(void) const;
+
+  // Return a hash for this type.  The hash function is public so ConNode
+  // (constants) can hash on their constant, which is represented by a Type.
+  virtual int hash() const;
+
+  // Map ideal registers (machine types) to ideal types
+  static const Type *mreg2type[];
+
+  // Printing, statistics
+  static const char * const msg[lastype]; // Printable strings
+#ifndef PRODUCT
+  void         dump_on(outputStream *st) const;
+  void         dump() const {
+    dump_on(tty);
+  }
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+  static  void dump_stats();
+  static  void verify_lastype();          // Check that arrays match type enum
+#endif
+  void typerr(const Type *t) const; // Mixing types error
+
+  // Create basic type
+  static const Type* get_const_basic_type(BasicType type) {
+    assert((uint)type <= T_CONFLICT && _const_basic_type[type] != NULL, "bad type");
+    return _const_basic_type[type];
+  }
+
+  // Mapping to the array element's basic type.
+  BasicType array_element_basic_type() const;
+
+  // Create standard type for a ciType:
+  static const Type* get_const_type(ciType* type);
+
+  // Create standard zero value:
+  static const Type* get_zero_type(BasicType type) {
+    assert((uint)type <= T_CONFLICT && _zero_type[type] != NULL, "bad type");
+    return _zero_type[type];
+  }
+
+  // Report if this is a zero value (not top).
+  bool is_zero_type() const {
+    BasicType type = basic_type();
+    if (type == T_VOID || type >= T_CONFLICT)
+      return false;
+    else
+      return (this == _zero_type[type]);
+  }
+
+  // Convenience common pre-built types.
+  static const Type *ABIO;
+  static const Type *BOTTOM;
+  static const Type *CONTROL;
+  static const Type *DOUBLE;
+  static const Type *FLOAT;
+  static const Type *HALF;
+  static const Type *MEMORY;
+  static const Type *MULTI;
+  static const Type *RETURN_ADDRESS;
+  static const Type *TOP;
+
+  // Mapping from compiler type to VM BasicType
+  BasicType basic_type() const { return _basic_type[_base]; }
+
+  // Mapping from CI type system to compiler type:
+  static const Type* get_typeflow_type(ciType* type);
+
+private:
+  // support arrays
+  static const BasicType _basic_type[];
+  static const Type*        _zero_type[T_CONFLICT+1];
+  static const Type* _const_basic_type[T_CONFLICT+1];
+};
+
+//------------------------------TypeF------------------------------------------
+// Class of Float-Constant Types.
+class TypeF : public Type {
+  TypeF( float f ) : Type(FloatCon), _f(f) {};
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+public:
+  const float _f;               // Float constant
+
+  static const TypeF *make(float f);
+
+  virtual bool        is_finite() const;  // Has a finite value
+  virtual bool        is_nan()    const;  // Is not a number (NaN)
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  // Convenience common pre-built types.
+  static const TypeF *ZERO; // positive zero only
+  static const TypeF *ONE;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeD------------------------------------------
+// Class of Double-Constant Types.
+class TypeD : public Type {
+  TypeD( double d ) : Type(DoubleCon), _d(d) {};
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+public:
+  const double _d;              // Double constant
+
+  static const TypeD *make(double d);
+
+  virtual bool        is_finite() const;  // Has a finite value
+  virtual bool        is_nan()    const;  // Is not a number (NaN)
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  // Convenience common pre-built types.
+  static const TypeD *ZERO; // positive zero only
+  static const TypeD *ONE;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeInt----------------------------------------
+// Class of integer ranges, the set of integers between a lower bound and an
+// upper bound, inclusive.
+class TypeInt : public Type {
+  TypeInt( jint lo, jint hi, int w );
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+public:
+  const jint _lo, _hi;          // Lower bound, upper bound
+  const short _widen;           // Limit on times we widen this sucker
+
+  static const TypeInt *make(jint lo);
+  // must always specify w
+  static const TypeInt *make(jint lo, jint hi, int w);
+
+  // Check for single integer
+  int is_con() const { return _lo==_hi; }
+  bool is_con(int i) const { return is_con() && _lo == i; }
+  jint get_con() const { assert( is_con(), "" );  return _lo; }
+
+  virtual bool        is_finite() const;  // Has a finite value
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  virtual const Type *widen( const Type *t ) const;
+  virtual const Type *narrow( const Type *t ) const;
+  // Do not kill _widen bits.
+  virtual const Type *filter( const Type *kills ) const;
+  // Convenience common pre-built types.
+  static const TypeInt *MINUS_1;
+  static const TypeInt *ZERO;
+  static const TypeInt *ONE;
+  static const TypeInt *BOOL;
+  static const TypeInt *CC;
+  static const TypeInt *CC_LT;  // [-1]  == MINUS_1
+  static const TypeInt *CC_GT;  // [1]   == ONE
+  static const TypeInt *CC_EQ;  // [0]   == ZERO
+  static const TypeInt *CC_LE;  // [-1,0]
+  static const TypeInt *CC_GE;  // [0,1] == BOOL (!)
+  static const TypeInt *BYTE;
+  static const TypeInt *CHAR;
+  static const TypeInt *SHORT;
+  static const TypeInt *POS;
+  static const TypeInt *POS1;
+  static const TypeInt *INT;
+  static const TypeInt *SYMINT; // symmetric range [-max_jint..max_jint]
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+
+//------------------------------TypeLong---------------------------------------
+// Class of long integer ranges, the set of integers between a lower bound and
+// an upper bound, inclusive.
+class TypeLong : public Type {
+  TypeLong( jlong lo, jlong hi, int w );
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+public:
+  const jlong _lo, _hi;         // Lower bound, upper bound
+  const short _widen;           // Limit on times we widen this sucker
+
+  static const TypeLong *make(jlong lo);
+  // must always specify w
+  static const TypeLong *make(jlong lo, jlong hi, int w);
+
+  // Check for single integer
+  int is_con() const { return _lo==_hi; }
+  jlong get_con() const { assert( is_con(), "" ); return _lo; }
+
+  virtual bool        is_finite() const;  // Has a finite value
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  virtual const Type *widen( const Type *t ) const;
+  virtual const Type *narrow( const Type *t ) const;
+  // Do not kill _widen bits.
+  virtual const Type *filter( const Type *kills ) const;
+  // Convenience common pre-built types.
+  static const TypeLong *MINUS_1;
+  static const TypeLong *ZERO;
+  static const TypeLong *ONE;
+  static const TypeLong *POS;
+  static const TypeLong *LONG;
+  static const TypeLong *INT;    // 32-bit subrange [min_jint..max_jint]
+  static const TypeLong *UINT;   // 32-bit unsigned [0..max_juint]
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint, outputStream *st  ) const;// Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeTuple--------------------------------------
+// Class of Tuple Types, essentially type collections for function signatures
+// and class layouts.  It happens to also be a fast cache for the HotSpot
+// signature types.
+class TypeTuple : public Type {
+  TypeTuple( uint cnt, const Type **fields ) : Type(Tuple), _cnt(cnt), _fields(fields) { }
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+
+public:
+  const uint          _cnt;              // Count of fields
+  const Type ** const _fields;           // Array of field types
+
+  // Accessors:
+  uint cnt() const { return _cnt; }
+  const Type* field_at(uint i) const {
+    assert(i < _cnt, "oob");
+    return _fields[i];
+  }
+  void set_field_at(uint i, const Type* t) {
+    assert(i < _cnt, "oob");
+    _fields[i] = t;
+  }
+
+  static const TypeTuple *make( uint cnt, const Type **fields );
+  static const TypeTuple *make_range(ciSignature *sig);
+  static const TypeTuple *make_domain(ciInstanceKlass* recv, ciSignature *sig);
+
+  // Subroutine call type with space allocated for argument types
+  static const Type **fields( uint arg_cnt );
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  // Convenience common pre-built types.
+  static const TypeTuple *IFBOTH;
+  static const TypeTuple *IFFALSE;
+  static const TypeTuple *IFTRUE;
+  static const TypeTuple *IFNEITHER;
+  static const TypeTuple *LOOPBODY;
+  static const TypeTuple *MEMBAR;
+  static const TypeTuple *STORECONDITIONAL;
+  static const TypeTuple *START_I2C;
+  static const TypeTuple *INT_PAIR;
+  static const TypeTuple *LONG_PAIR;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint, outputStream *st  ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeAry----------------------------------------
+// Class of Array Types
+class TypeAry : public Type {
+  TypeAry( const Type *elem, const TypeInt *size) : Type(Array),
+    _elem(elem), _size(size) {}
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+
+private:
+  const Type *_elem;            // Element type of array
+  const TypeInt *_size;         // Elements in array
+  friend class TypeAryPtr;
+
+public:
+  static const TypeAry *make(  const Type *elem, const TypeInt *size);
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  bool ary_must_be_exact() const;  // true if arrays of such are never generic
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint, outputStream *st  ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypePtr----------------------------------------
+// Class of machine Pointer Types: raw data, instances or arrays.
+// If the _base enum is AnyPtr, then this refers to all of the above.
+// Otherwise the _base will indicate which subset of pointers is affected,
+// and the class will be inherited from.
+class TypePtr : public Type {
+public:
+  enum PTR { TopPTR, AnyNull, Constant, Null, NotNull, BotPTR, lastPTR };
+protected:
+  TypePtr( TYPES t, PTR ptr, int offset ) : Type(t), _ptr(ptr), _offset(offset) {}
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  static const PTR ptr_meet[lastPTR][lastPTR];
+  static const PTR ptr_dual[lastPTR];
+  static const char * const ptr_msg[lastPTR];
+
+public:
+  const int _offset;            // Offset into oop, with TOP & BOT
+  const PTR _ptr;               // Pointer equivalence class
+
+  const int offset() const { return _offset; }
+  const PTR ptr()    const { return _ptr; }
+
+  static const TypePtr *make( TYPES t, PTR ptr, int offset );
+
+  // Return a 'ptr' version of this type
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual intptr_t get_con() const;
+
+  virtual const TypePtr *add_offset( int offset ) const;
+
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+  virtual const Type *xmeet( const Type *t ) const;
+  int meet_offset( int offset ) const;
+  int dual_offset( ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  // meet, dual and join over pointer equivalence sets
+  PTR meet_ptr( const PTR in_ptr ) const { return ptr_meet[in_ptr][ptr()]; }
+  PTR dual_ptr()                   const { return ptr_dual[ptr()];      }
+
+  // This is textually confusing unless one recalls that
+  // join(t) == dual()->meet(t->dual())->dual().
+  PTR join_ptr( const PTR in_ptr ) const {
+    return ptr_dual[ ptr_meet[ ptr_dual[in_ptr] ] [ dual_ptr() ] ];
+  }
+
+  // Tests for relation to centerline of type lattice:
+  static bool above_centerline(PTR ptr) { return (ptr <= AnyNull); }
+  static bool below_centerline(PTR ptr) { return (ptr >= NotNull); }
+  // Convenience common pre-built types.
+  static const TypePtr *NULL_PTR;
+  static const TypePtr *NOTNULL;
+  static const TypePtr *BOTTOM;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st  ) const;
+#endif
+};
+
+//------------------------------TypeRawPtr-------------------------------------
+// Class of raw pointers, pointers to things other than Oops.  Examples
+// include the stack pointer, top of heap, card-marking area, handles, etc.
+class TypeRawPtr : public TypePtr {
+protected:
+  TypeRawPtr( PTR ptr, address bits ) : TypePtr(RawPtr,ptr,0), _bits(bits){}
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;     // Type specific hashing
+
+  const address _bits;          // Constant value, if applicable
+
+  static const TypeRawPtr *make( PTR ptr );
+  static const TypeRawPtr *make( address bits );
+
+  // Return a 'ptr' version of this type
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual intptr_t get_con() const;
+
+  virtual const TypePtr *add_offset( int offset ) const;
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+  // Convenience common pre-built types.
+  static const TypeRawPtr *BOTTOM;
+  static const TypeRawPtr *NOTNULL;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st  ) const;
+#endif
+};
+
+//------------------------------TypeOopPtr-------------------------------------
+// Some kind of oop (Java pointer), either klass or instance or array.
+class TypeOopPtr : public TypePtr {
+protected:
+  TypeOopPtr( TYPES t, PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id ) : TypePtr(t, ptr, offset), _const_oop(o), _klass(k), _klass_is_exact(xk), _instance_id(instance_id) { }
+public:
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  enum {
+   UNKNOWN_INSTANCE = 0
+  };
+protected:
+
+  int xadd_offset( int offset ) const;
+  // Oop is NULL, unless this is a constant oop.
+  ciObject*     _const_oop;   // Constant oop
+  // If _klass is NULL, then so is _sig.  This is an unloaded klass.
+  ciKlass*      _klass;       // Klass object
+  // Does the type exclude subclasses of the klass?  (Inexact == polymorphic.)
+  bool          _klass_is_exact;
+
+  int          _instance_id;   // if not UNKNOWN_INSTANCE, indicates that this is a particular instance
+                               // of this type which is distinct.  This is the  the node index of the
+                               // node creating this instance
+
+  static const TypeOopPtr* make_from_klass_common(ciKlass* klass, bool klass_change, bool try_for_exact);
+
+  int dual_instance()      const { return -_instance_id; }
+  int meet_instance(int uid) const;
+
+public:
+  // Creates a type given a klass. Correctly handles multi-dimensional arrays
+  // Respects UseUniqueSubclasses.
+  // If the klass is final, the resulting type will be exact.
+  static const TypeOopPtr* make_from_klass(ciKlass* klass) {
+    return make_from_klass_common(klass, true, false);
+  }
+  // Same as before, but will produce an exact type, even if
+  // the klass is not final, as long as it has exactly one implementation.
+  static const TypeOopPtr* make_from_klass_unique(ciKlass* klass) {
+    return make_from_klass_common(klass, true, true);
+  }
+  // Same as before, but does not respects UseUniqueSubclasses.
+  // Use this only for creating array element types.
+  static const TypeOopPtr* make_from_klass_raw(ciKlass* klass) {
+    return make_from_klass_common(klass, false, false);
+  }
+  // Creates a singleton type given an object.
+  static const TypeOopPtr* make_from_constant(ciObject* o);
+
+  // Make a generic (unclassed) pointer to an oop.
+  static const TypeOopPtr* make(PTR ptr, int offset);
+
+  ciObject* const_oop()    const { return _const_oop; }
+  virtual ciKlass* klass() const { return _klass;     }
+  bool klass_is_exact()    const { return _klass_is_exact; }
+  bool is_instance()       const { return _instance_id != UNKNOWN_INSTANCE; }
+  uint instance_id()       const { return _instance_id; }
+
+  virtual intptr_t get_con() const;
+
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+  virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+  // corresponding pointer to klass, for a given instance
+  const TypeKlassPtr* as_klass_type() const;
+
+  virtual const TypePtr *add_offset( int offset ) const;
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  // Do not allow interface-vs.-noninterface joins to collapse to top.
+  virtual const Type *filter( const Type *kills ) const;
+
+  // Convenience common pre-built type.
+  static const TypeOopPtr *BOTTOM;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const;
+#endif
+};
+
+//------------------------------TypeInstPtr------------------------------------
+// Class of Java object pointers, pointing either to non-array Java instances
+// or to a klassOop (including array klasses).
+class TypeInstPtr : public TypeOopPtr {
+  TypeInstPtr( PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id );
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+
+  ciSymbol*  _name;        // class name
+
+ public:
+  ciSymbol* name()         const { return _name; }
+
+  bool  is_loaded() const { return _klass->is_loaded(); }
+
+  // Make a pointer to a constant oop.
+  static const TypeInstPtr *make(ciObject* o) {
+    return make(TypePtr::Constant, o->klass(), true, o, 0);
+  }
+
+  // Make a pointer to a constant oop with offset.
+  static const TypeInstPtr *make(ciObject* o, int offset) {
+    return make(TypePtr::Constant, o->klass(), true, o, offset);
+  }
+
+  // Make a pointer to some value of type klass.
+  static const TypeInstPtr *make(PTR ptr, ciKlass* klass) {
+    return make(ptr, klass, false, NULL, 0);
+  }
+
+  // Make a pointer to some non-polymorphic value of exactly type klass.
+  static const TypeInstPtr *make_exact(PTR ptr, ciKlass* klass) {
+    return make(ptr, klass, true, NULL, 0);
+  }
+
+  // Make a pointer to some value of type klass with offset.
+  static const TypeInstPtr *make(PTR ptr, ciKlass* klass, int offset) {
+    return make(ptr, klass, false, NULL, offset);
+  }
+
+  // Make a pointer to an oop.
+  static const TypeInstPtr *make(PTR ptr, ciKlass* k, bool xk, ciObject* o, int offset, int instance_id = 0 );
+
+  // If this is a java.lang.Class constant, return the type for it or NULL.
+  // Pass to Type::get_const_type to turn it to a type, which will usually
+  // be a TypeInstPtr, but may also be a TypeInt::INT for int.class, etc.
+  ciType* java_mirror_type() const;
+
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+  virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+  virtual const TypePtr *add_offset( int offset ) const;
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const TypeInstPtr *xmeet_unloaded( const TypeInstPtr *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  // Convenience common pre-built types.
+  static const TypeInstPtr *NOTNULL;
+  static const TypeInstPtr *BOTTOM;
+  static const TypeInstPtr *MIRROR;
+  static const TypeInstPtr *MARK;
+  static const TypeInstPtr *KLASS;
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeAryPtr-------------------------------------
+// Class of Java array pointers
+class TypeAryPtr : public TypeOopPtr {
+  TypeAryPtr( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id ) : TypeOopPtr(AryPtr,ptr,k,xk,o,offset, instance_id), _ary(ary) {};
+  virtual bool eq( const Type *t ) const;
+  virtual int hash() const;     // Type specific hashing
+  const TypeAry *_ary;          // Array we point into
+
+public:
+  // Accessors
+  ciKlass* klass() const;
+  const TypeAry* ary() const  { return _ary; }
+  const Type*    elem() const { return _ary->_elem; }
+  const TypeInt* size() const { return _ary->_size; }
+
+  static const TypeAryPtr *make( PTR ptr, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = 0);
+  // Constant pointer to array
+  static const TypeAryPtr *make( PTR ptr, ciObject* o, const TypeAry *ary, ciKlass* k, bool xk, int offset, int instance_id = 0);
+
+  // Convenience
+  static const TypeAryPtr *make(ciObject* o);
+
+  // Return a 'ptr' version of this type
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+  virtual const TypeOopPtr *cast_to_instance(int instance_id) const;
+
+  virtual const TypeAryPtr* cast_to_size(const TypeInt* size) const;
+
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+  virtual const TypePtr *add_offset( int offset ) const;
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  // Convenience common pre-built types.
+  static const TypeAryPtr *RANGE;
+  static const TypeAryPtr *OOPS;
+  static const TypeAryPtr *BYTES;
+  static const TypeAryPtr *SHORTS;
+  static const TypeAryPtr *CHARS;
+  static const TypeAryPtr *INTS;
+  static const TypeAryPtr *LONGS;
+  static const TypeAryPtr *FLOATS;
+  static const TypeAryPtr *DOUBLES;
+  // selects one of the above:
+  static const TypeAryPtr *get_array_body_type(BasicType elem) {
+    assert((uint)elem <= T_CONFLICT && _array_body_type[elem] != NULL, "bad elem type");
+    return _array_body_type[elem];
+  }
+  static const TypeAryPtr *_array_body_type[T_CONFLICT+1];
+  // sharpen the type of an int which is used as an array size
+  static const TypeInt* narrow_size_type(const TypeInt* size, BasicType elem);
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeKlassPtr-----------------------------------
+// Class of Java Klass pointers
+class TypeKlassPtr : public TypeOopPtr {
+  TypeKlassPtr( PTR ptr, ciKlass* klass, int offset );
+
+  virtual bool eq( const Type *t ) const;
+  virtual int hash() const;             // Type specific hashing
+
+public:
+  ciSymbol* name()  const { return _klass->name(); }
+
+  // ptr to klass 'k'
+  static const TypeKlassPtr *make( ciKlass* k ) { return make( TypePtr::Constant, k, 0); }
+  // ptr to klass 'k' with offset
+  static const TypeKlassPtr *make( ciKlass* k, int offset ) { return make( TypePtr::Constant, k, offset); }
+  // ptr to klass 'k' or sub-klass
+  static const TypeKlassPtr *make( PTR ptr, ciKlass* k, int offset);
+
+  virtual const Type *cast_to_ptr_type(PTR ptr) const;
+
+  virtual const Type *cast_to_exactness(bool klass_is_exact) const;
+
+  // corresponding pointer to instance, for a given class
+  const TypeOopPtr* as_instance_type() const;
+
+  virtual const TypePtr *add_offset( int offset ) const;
+  virtual const Type    *xmeet( const Type *t ) const;
+  virtual const Type    *xdual() const;      // Compute dual right now.
+
+  // Convenience common pre-built types.
+  static const TypeKlassPtr* OBJECT; // Not-null object klass or below
+  static const TypeKlassPtr* OBJECT_OR_NULL; // Maybe-null version of same
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+#endif
+};
+
+//------------------------------TypeFunc---------------------------------------
+// Class of Array Types
+class TypeFunc : public Type {
+  TypeFunc( const TypeTuple *domain, const TypeTuple *range ) : Type(Function),  _domain(domain), _range(range) {}
+  virtual bool eq( const Type *t ) const;
+  virtual int  hash() const;             // Type specific hashing
+  virtual bool singleton(void) const;    // TRUE if type is a singleton
+  virtual bool empty(void) const;        // TRUE if type is vacuous
+public:
+  // Constants are shared among ADLC and VM
+  enum { Control    = AdlcVMDeps::Control,
+         I_O        = AdlcVMDeps::I_O,
+         Memory     = AdlcVMDeps::Memory,
+         FramePtr   = AdlcVMDeps::FramePtr,
+         ReturnAdr  = AdlcVMDeps::ReturnAdr,
+         Parms      = AdlcVMDeps::Parms
+  };
+
+  const TypeTuple* const _domain;     // Domain of inputs
+  const TypeTuple* const _range;      // Range of results
+
+  // Accessors:
+  const TypeTuple* domain() const { return _domain; }
+  const TypeTuple* range()  const { return _range; }
+
+  static const TypeFunc *make(ciMethod* method);
+  static const TypeFunc *make(ciSignature signature, const Type* extra);
+  static const TypeFunc *make(const TypeTuple* domain, const TypeTuple* range);
+
+  virtual const Type *xmeet( const Type *t ) const;
+  virtual const Type *xdual() const;    // Compute dual right now.
+
+  BasicType return_type() const;
+
+#ifndef PRODUCT
+  virtual void dump2( Dict &d, uint depth, outputStream *st ) const; // Specialized per-Type dumping
+  void print_flattened() const; // Print a 'flattened' signature
+#endif
+  // Convenience common pre-built types.
+};
+
+//------------------------------accessors--------------------------------------
+inline float Type::getf() const {
+  assert( _base == FloatCon, "Not a FloatCon" );
+  return ((TypeF*)this)->_f;
+}
+
+inline double Type::getd() const {
+  assert( _base == DoubleCon, "Not a DoubleCon" );
+  return ((TypeD*)this)->_d;
+}
+
+inline const TypeF *Type::is_float_constant() const {
+  assert( _base == FloatCon, "Not a Float" );
+  return (TypeF*)this;
+}
+
+inline const TypeF *Type::isa_float_constant() const {
+  return ( _base == FloatCon ? (TypeF*)this : NULL);
+}
+
+inline const TypeD *Type::is_double_constant() const {
+  assert( _base == DoubleCon, "Not a Double" );
+  return (TypeD*)this;
+}
+
+inline const TypeD *Type::isa_double_constant() const {
+  return ( _base == DoubleCon ? (TypeD*)this : NULL);
+}
+
+inline const TypeInt *Type::is_int() const {
+  assert( _base == Int, "Not an Int" );
+  return (TypeInt*)this;
+}
+
+inline const TypeInt *Type::isa_int() const {
+  return ( _base == Int ? (TypeInt*)this : NULL);
+}
+
+inline const TypeLong *Type::is_long() const {
+  assert( _base == Long, "Not a Long" );
+  return (TypeLong*)this;
+}
+
+inline const TypeLong *Type::isa_long() const {
+  return ( _base == Long ? (TypeLong*)this : NULL);
+}
+
+inline const TypeTuple *Type::is_tuple() const {
+  assert( _base == Tuple, "Not a Tuple" );
+  return (TypeTuple*)this;
+}
+
+inline const TypeAry *Type::is_ary() const {
+  assert( _base == Array , "Not an Array" );
+  return (TypeAry*)this;
+}
+
+inline const TypePtr *Type::is_ptr() const {
+  // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
+  assert(_base >= AnyPtr && _base <= KlassPtr, "Not a pointer");
+  return (TypePtr*)this;
+}
+
+inline const TypePtr *Type::isa_ptr() const {
+  // AnyPtr is the first Ptr and KlassPtr the last, with no non-ptrs between.
+  return (_base >= AnyPtr && _base <= KlassPtr) ? (TypePtr*)this : NULL;
+}
+
+inline const TypeOopPtr *Type::is_oopptr() const {
+  // OopPtr is the first and KlassPtr the last, with no non-oops between.
+  assert(_base >= OopPtr && _base <= KlassPtr, "Not a Java pointer" ) ;
+  return (TypeOopPtr*)this;
+}
+
+inline const TypeOopPtr *Type::isa_oopptr() const {
+  // OopPtr is the first and KlassPtr the last, with no non-oops between.
+  return (_base >= OopPtr && _base <= KlassPtr) ? (TypeOopPtr*)this : NULL;
+}
+
+inline const TypeRawPtr *Type::is_rawptr() const {
+  assert( _base == RawPtr, "Not a raw pointer" );
+  return (TypeRawPtr*)this;
+}
+
+inline const TypeInstPtr *Type::isa_instptr() const {
+  return (_base == InstPtr) ? (TypeInstPtr*)this : NULL;
+}
+
+inline const TypeInstPtr *Type::is_instptr() const {
+  assert( _base == InstPtr, "Not an object pointer" );
+  return (TypeInstPtr*)this;
+}
+
+inline const TypeAryPtr *Type::isa_aryptr() const {
+  return (_base == AryPtr) ? (TypeAryPtr*)this : NULL;
+}
+
+inline const TypeAryPtr *Type::is_aryptr() const {
+  assert( _base == AryPtr, "Not an array pointer" );
+  return (TypeAryPtr*)this;
+}
+
+inline const TypeKlassPtr *Type::isa_klassptr() const {
+  return (_base == KlassPtr) ? (TypeKlassPtr*)this : NULL;
+}
+
+inline const TypeKlassPtr *Type::is_klassptr() const {
+  assert( _base == KlassPtr, "Not a klass pointer" );
+  return (TypeKlassPtr*)this;
+}
+
+inline bool Type::is_floatingpoint() const {
+  if( (_base == FloatCon)  || (_base == FloatBot) ||
+      (_base == DoubleCon) || (_base == DoubleBot) )
+    return true;
+  return false;
+}
+
+
+// ===============================================================
+// Things that need to be 64-bits in the 64-bit build but
+// 32-bits in the 32-bit build.  Done this way to get full
+// optimization AND strong typing.
+#ifdef _LP64
+
+// For type queries and asserts
+#define is_intptr_t  is_long
+#define isa_intptr_t isa_long
+#define find_intptr_t_type find_long_type
+#define find_intptr_t_con  find_long_con
+#define TypeX        TypeLong
+#define Type_X       Type::Long
+#define TypeX_X      TypeLong::LONG
+#define TypeX_ZERO   TypeLong::ZERO
+// For 'ideal_reg' machine registers
+#define Op_RegX      Op_RegL
+// For phase->intcon variants
+#define MakeConX     longcon
+#define ConXNode     ConLNode
+// For array index arithmetic
+#define MulXNode     MulLNode
+#define AndXNode     AndLNode
+#define OrXNode      OrLNode
+#define CmpXNode     CmpLNode
+#define SubXNode     SubLNode
+#define LShiftXNode  LShiftLNode
+// For object size computation:
+#define AddXNode     AddLNode
+// For card marks and hashcodes
+#define URShiftXNode URShiftLNode
+// Opcodes
+#define Op_LShiftX   Op_LShiftL
+#define Op_AndX      Op_AndL
+#define Op_AddX      Op_AddL
+#define Op_SubX      Op_SubL
+// conversions
+#define ConvI2X(x)   ConvI2L(x)
+#define ConvL2X(x)   (x)
+#define ConvX2I(x)   ConvL2I(x)
+#define ConvX2L(x)   (x)
+
+#else
+
+// For type queries and asserts
+#define is_intptr_t  is_int
+#define isa_intptr_t isa_int
+#define find_intptr_t_type find_int_type
+#define find_intptr_t_con  find_int_con
+#define TypeX        TypeInt
+#define Type_X       Type::Int
+#define TypeX_X      TypeInt::INT
+#define TypeX_ZERO   TypeInt::ZERO
+// For 'ideal_reg' machine registers
+#define Op_RegX      Op_RegI
+// For phase->intcon variants
+#define MakeConX     intcon
+#define ConXNode     ConINode
+// For array index arithmetic
+#define MulXNode     MulINode
+#define AndXNode     AndINode
+#define OrXNode      OrINode
+#define CmpXNode     CmpINode
+#define SubXNode     SubINode
+#define LShiftXNode  LShiftINode
+// For object size computation:
+#define AddXNode     AddINode
+// For card marks and hashcodes
+#define URShiftXNode URShiftINode
+// Opcodes
+#define Op_LShiftX   Op_LShiftI
+#define Op_AndX      Op_AndI
+#define Op_AddX      Op_AddI
+#define Op_SubX      Op_SubI
+// conversions
+#define ConvI2X(x)   (x)
+#define ConvL2X(x)   ConvL2I(x)
+#define ConvX2I(x)   (x)
+#define ConvX2L(x)   ConvI2L(x)
+
+#endif
diff --git a/src/share/vm/opto/vectornode.cpp b/src/share/vm/opto/vectornode.cpp
new file mode 100644
index 000000000..f13751605
--- /dev/null
+++ b/src/share/vm/opto/vectornode.cpp
@@ -0,0 +1,478 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+#include "incls/_precompiled.incl"
+#include "incls/_vectornode.cpp.incl"
+
+//------------------------------VectorNode--------------------------------------
+
+// Return vector type for an element type and vector length.
+const Type* VectorNode::vect_type(BasicType elt_bt, uint len) {
+  assert(len <= VectorNode::max_vlen(elt_bt), "len in range");
+  switch(elt_bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    switch(len) {
+    case 2:  return TypeInt::CHAR;
+    case 4:  return TypeInt::INT;
+    case 8:  return TypeLong::LONG;
+    }
+    break;
+  case T_CHAR:
+  case T_SHORT:
+    switch(len) {
+    case 2:  return TypeInt::INT;
+    case 4:  return TypeLong::LONG;
+    }
+    break;
+  case T_INT:
+    switch(len) {
+    case 2:  return TypeLong::LONG;
+    }
+    break;
+  case T_LONG:
+    break;
+  case T_FLOAT:
+    switch(len) {
+    case 2:  return Type::DOUBLE;
+    }
+    break;
+  case T_DOUBLE:
+    break;
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Scalar promotion
+VectorNode* VectorNode::scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  assert(vlen <= VectorNode::max_vlen(bt), "vlen in range");
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    if (vlen == 16) return new (C, 2) Replicate16BNode(s);
+    if (vlen ==  8) return new (C, 2) Replicate8BNode(s);
+    if (vlen ==  4) return new (C, 2) Replicate4BNode(s);
+    break;
+  case T_CHAR:
+    if (vlen == 8) return new (C, 2) Replicate8CNode(s);
+    if (vlen == 4) return new (C, 2) Replicate4CNode(s);
+    if (vlen == 2) return new (C, 2) Replicate2CNode(s);
+    break;
+  case T_SHORT:
+    if (vlen == 8) return new (C, 2) Replicate8SNode(s);
+    if (vlen == 4) return new (C, 2) Replicate4SNode(s);
+    if (vlen == 2) return new (C, 2) Replicate2SNode(s);
+    break;
+  case T_INT:
+    if (vlen == 4) return new (C, 2) Replicate4INode(s);
+    if (vlen == 2) return new (C, 2) Replicate2INode(s);
+    break;
+  case T_LONG:
+    if (vlen == 2) return new (C, 2) Replicate2LNode(s);
+    break;
+  case T_FLOAT:
+    if (vlen == 4) return new (C, 2) Replicate4FNode(s);
+    if (vlen == 2) return new (C, 2) Replicate2FNode(s);
+    break;
+  case T_DOUBLE:
+    if (vlen == 2) return new (C, 2) Replicate2DNode(s);
+    break;
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Return initial Pack node. Additional operands added with add_opd() calls.
+PackNode* PackNode::make(Compile* C, Node* s, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, 2) PackBNode(s);
+  case T_CHAR:
+    return new (C, 2) PackCNode(s);
+  case T_SHORT:
+    return new (C, 2) PackSNode(s);
+  case T_INT:
+    return new (C, 2) PackINode(s);
+  case T_LONG:
+    return new (C, 2) PackLNode(s);
+  case T_FLOAT:
+    return new (C, 2) PackFNode(s);
+  case T_DOUBLE:
+    return new (C, 2) PackDNode(s);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Create a binary tree form for Packs. [lo, hi) (half-open) range
+Node* PackNode::binaryTreePack(Compile* C, int lo, int hi) {
+  int ct = hi - lo;
+  assert(is_power_of_2(ct), "power of 2");
+  int mid = lo + ct/2;
+  Node* n1 = ct == 2 ? in(lo)   : binaryTreePack(C, lo,  mid);
+  Node* n2 = ct == 2 ? in(lo+1) : binaryTreePack(C, mid, hi );
+  int rslt_bsize = ct * type2aelembytes[elt_basic_type()];
+  if (bottom_type()->is_floatingpoint()) {
+    switch (rslt_bsize) {
+    case  8: return new (C, 3) PackFNode(n1, n2);
+    case 16: return new (C, 3) PackDNode(n1, n2);
+    }
+  } else {
+    assert(bottom_type()->isa_int() || bottom_type()->isa_long(), "int or long");
+    switch (rslt_bsize) {
+    case  2: return new (C, 3) Pack2x1BNode(n1, n2);
+    case  4: return new (C, 3) Pack2x2BNode(n1, n2);
+    case  8: return new (C, 3) PackINode(n1, n2);
+    case 16: return new (C, 3) PackLNode(n1, n2);
+    }
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Return the vector operator for the specified scalar operation
+// and vector length.  One use is to check if the code generator
+// supports the vector operation.
+int VectorNode::opcode(int sopc, uint vlen, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  if (!(is_power_of_2(vlen) && vlen <= max_vlen(bt)))
+    return 0; // unimplemented
+  switch (sopc) {
+  case Op_AddI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:      return Op_AddVB;
+    case T_CHAR:      return Op_AddVC;
+    case T_SHORT:     return Op_AddVS;
+    case T_INT:       return Op_AddVI;
+    }
+    ShouldNotReachHere();
+  case Op_AddL:
+    assert(bt == T_LONG, "must be");
+    return Op_AddVL;
+  case Op_AddF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_AddVF;
+  case Op_AddD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_AddVD;
+  case Op_SubI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return Op_SubVB;
+    case T_CHAR:   return Op_SubVC;
+    case T_SHORT:  return Op_SubVS;
+    case T_INT:    return Op_SubVI;
+    }
+    ShouldNotReachHere();
+  case Op_SubL:
+    assert(bt == T_LONG, "must be");
+    return Op_SubVL;
+  case Op_SubF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_SubVF;
+  case Op_SubD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_SubVD;
+  case Op_MulF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_MulVF;
+  case Op_MulD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_MulVD;
+  case Op_DivF:
+    assert(bt == T_FLOAT, "must be");
+    return Op_DivVF;
+  case Op_DivD:
+    assert(bt == T_DOUBLE, "must be");
+    return Op_DivVD;
+  case Op_LShiftI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return Op_LShiftVB;
+    case T_CHAR:   return Op_LShiftVC;
+    case T_SHORT:  return Op_LShiftVS;
+    case T_INT:    return Op_LShiftVI;
+    }
+    ShouldNotReachHere();
+  case Op_URShiftI:
+    switch (bt) {
+    case T_BOOLEAN:
+    case T_BYTE:   return Op_URShiftVB;
+    case T_CHAR:   return Op_URShiftVC;
+    case T_SHORT:  return Op_URShiftVS;
+    case T_INT:    return Op_URShiftVI;
+    }
+    ShouldNotReachHere();
+  case Op_AndI:
+  case Op_AndL:
+    return Op_AndV;
+  case Op_OrI:
+  case Op_OrL:
+    return Op_OrV;
+  case Op_XorI:
+  case Op_XorL:
+    return Op_XorV;
+
+  case Op_LoadB:
+  case Op_LoadC:
+  case Op_LoadS:
+  case Op_LoadI:
+  case Op_LoadL:
+  case Op_LoadF:
+  case Op_LoadD:
+    return VectorLoadNode::opcode(sopc, vlen);
+
+  case Op_StoreB:
+  case Op_StoreC:
+  case Op_StoreI:
+  case Op_StoreL:
+  case Op_StoreF:
+  case Op_StoreD:
+    return VectorStoreNode::opcode(sopc, vlen);
+  }
+  return 0; // Unimplemented
+}
+
+// Helper for above.
+int VectorLoadNode::opcode(int sopc, uint vlen) {
+  switch (sopc) {
+  case Op_LoadB:
+    switch (vlen) {
+    case  2:       return 0; // Unimplemented
+    case  4:       return Op_Load4B;
+    case  8:       return Op_Load8B;
+    case 16:       return Op_Load16B;
+    }
+    break;
+  case Op_LoadC:
+    switch (vlen) {
+    case  2:       return Op_Load2C;
+    case  4:       return Op_Load4C;
+    case  8:       return Op_Load8C;
+    }
+    break;
+  case Op_LoadS:
+    switch (vlen) {
+    case  2:       return Op_Load2S;
+    case  4:       return Op_Load4S;
+    case  8:       return Op_Load8S;
+    }
+    break;
+  case Op_LoadI:
+    switch (vlen) {
+    case  2:       return Op_Load2I;
+    case  4:       return Op_Load4I;
+    }
+    break;
+  case Op_LoadL:
+    if (vlen == 2) return Op_Load2L;
+    break;
+  case Op_LoadF:
+    switch (vlen) {
+    case  2:       return Op_Load2F;
+    case  4:       return Op_Load4F;
+    }
+    break;
+  case Op_LoadD:
+    if (vlen == 2) return Op_Load2D;
+    break;
+  }
+  return 0; // Unimplemented
+}
+
+// Helper for above
+int VectorStoreNode::opcode(int sopc, uint vlen) {
+  switch (sopc) {
+  case Op_StoreB:
+    switch (vlen) {
+    case  2:       return 0; // Unimplemented
+    case  4:       return Op_Store4B;
+    case  8:       return Op_Store8B;
+    case 16:       return Op_Store16B;
+    }
+    break;
+  case Op_StoreC:
+    switch (vlen) {
+    case  2:       return Op_Store2C;
+    case  4:       return Op_Store4C;
+    case  8:       return Op_Store8C;
+    }
+    break;
+  case Op_StoreI:
+    switch (vlen) {
+    case  2:       return Op_Store2I;
+    case  4:       return Op_Store4I;
+    }
+    break;
+  case Op_StoreL:
+    if (vlen == 2) return Op_Store2L;
+    break;
+  case Op_StoreF:
+    switch (vlen) {
+    case  2:       return Op_Store2F;
+    case  4:       return Op_Store4F;
+    }
+    break;
+  case Op_StoreD:
+    if (vlen == 2) return Op_Store2D;
+    break;
+  }
+  return 0; // Unimplemented
+}
+
+// Return the vector version of a scalar operation node.
+VectorNode* VectorNode::make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* opd_t) {
+  int vopc = opcode(sopc, vlen, opd_t);
+
+  switch (vopc) {
+  case Op_AddVB: return new (C, 3) AddVBNode(n1, n2, vlen);
+  case Op_AddVC: return new (C, 3) AddVCNode(n1, n2, vlen);
+  case Op_AddVS: return new (C, 3) AddVSNode(n1, n2, vlen);
+  case Op_AddVI: return new (C, 3) AddVINode(n1, n2, vlen);
+  case Op_AddVL: return new (C, 3) AddVLNode(n1, n2, vlen);
+  case Op_AddVF: return new (C, 3) AddVFNode(n1, n2, vlen);
+  case Op_AddVD: return new (C, 3) AddVDNode(n1, n2, vlen);
+
+  case Op_SubVB: return new (C, 3) SubVBNode(n1, n2, vlen);
+  case Op_SubVC: return new (C, 3) SubVCNode(n1, n2, vlen);
+  case Op_SubVS: return new (C, 3) SubVSNode(n1, n2, vlen);
+  case Op_SubVI: return new (C, 3) SubVINode(n1, n2, vlen);
+  case Op_SubVL: return new (C, 3) SubVLNode(n1, n2, vlen);
+  case Op_SubVF: return new (C, 3) SubVFNode(n1, n2, vlen);
+  case Op_SubVD: return new (C, 3) SubVDNode(n1, n2, vlen);
+
+  case Op_MulVF: return new (C, 3) MulVFNode(n1, n2, vlen);
+  case Op_MulVD: return new (C, 3) MulVDNode(n1, n2, vlen);
+
+  case Op_DivVF: return new (C, 3) DivVFNode(n1, n2, vlen);
+  case Op_DivVD: return new (C, 3) DivVDNode(n1, n2, vlen);
+
+  case Op_LShiftVB: return new (C, 3) LShiftVBNode(n1, n2, vlen);
+  case Op_LShiftVC: return new (C, 3) LShiftVCNode(n1, n2, vlen);
+  case Op_LShiftVS: return new (C, 3) LShiftVSNode(n1, n2, vlen);
+  case Op_LShiftVI: return new (C, 3) LShiftVINode(n1, n2, vlen);
+
+  case Op_URShiftVB: return new (C, 3) URShiftVBNode(n1, n2, vlen);
+  case Op_URShiftVC: return new (C, 3) URShiftVCNode(n1, n2, vlen);
+  case Op_URShiftVS: return new (C, 3) URShiftVSNode(n1, n2, vlen);
+  case Op_URShiftVI: return new (C, 3) URShiftVINode(n1, n2, vlen);
+
+  case Op_AndV: return new (C, 3) AndVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+  case Op_OrV:  return new (C, 3) OrVNode (n1, n2, vlen, opd_t->array_element_basic_type());
+  case Op_XorV: return new (C, 3) XorVNode(n1, n2, vlen, opd_t->array_element_basic_type());
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Return the vector version of a scalar load node.
+VectorLoadNode* VectorLoadNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+                                     Node* adr, const TypePtr* atyp, uint vlen) {
+  int vopc = opcode(opc, vlen);
+
+  switch(vopc) {
+  case Op_Load16B: return new (C, 3) Load16BNode(ctl, mem, adr, atyp);
+  case Op_Load8B:  return new (C, 3) Load8BNode(ctl, mem, adr, atyp);
+  case Op_Load4B:  return new (C, 3) Load4BNode(ctl, mem, adr, atyp);
+
+  case Op_Load8C:  return new (C, 3) Load8CNode(ctl, mem, adr, atyp);
+  case Op_Load4C:  return new (C, 3) Load4CNode(ctl, mem, adr, atyp);
+  case Op_Load2C:  return new (C, 3) Load2CNode(ctl, mem, adr, atyp);
+
+  case Op_Load8S:  return new (C, 3) Load8SNode(ctl, mem, adr, atyp);
+  case Op_Load4S:  return new (C, 3) Load4SNode(ctl, mem, adr, atyp);
+  case Op_Load2S:  return new (C, 3) Load2SNode(ctl, mem, adr, atyp);
+
+  case Op_Load4I:  return new (C, 3) Load4INode(ctl, mem, adr, atyp);
+  case Op_Load2I:  return new (C, 3) Load2INode(ctl, mem, adr, atyp);
+
+  case Op_Load2L:  return new (C, 3) Load2LNode(ctl, mem, adr, atyp);
+
+  case Op_Load4F:  return new (C, 3) Load4FNode(ctl, mem, adr, atyp);
+  case Op_Load2F:  return new (C, 3) Load2FNode(ctl, mem, adr, atyp);
+
+  case Op_Load2D:  return new (C, 3) Load2DNode(ctl, mem, adr, atyp);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Return the vector version of a scalar store node.
+VectorStoreNode* VectorStoreNode::make(Compile* C, int opc, Node* ctl, Node* mem,
+                                       Node* adr, const TypePtr* atyp, VectorNode* val,
+                                       uint vlen) {
+  int vopc = opcode(opc, vlen);
+
+  switch(vopc) {
+  case Op_Store16B: return new (C, 4) Store16BNode(ctl, mem, adr, atyp, val);
+  case Op_Store8B: return new (C, 4) Store8BNode(ctl, mem, adr, atyp, val);
+  case Op_Store4B: return new (C, 4) Store4BNode(ctl, mem, adr, atyp, val);
+
+  case Op_Store8C: return new (C, 4) Store8CNode(ctl, mem, adr, atyp, val);
+  case Op_Store4C: return new (C, 4) Store4CNode(ctl, mem, adr, atyp, val);
+  case Op_Store2C: return new (C, 4) Store2CNode(ctl, mem, adr, atyp, val);
+
+  case Op_Store4I: return new (C, 4) Store4INode(ctl, mem, adr, atyp, val);
+  case Op_Store2I: return new (C, 4) Store2INode(ctl, mem, adr, atyp, val);
+
+  case Op_Store2L: return new (C, 4) Store2LNode(ctl, mem, adr, atyp, val);
+
+  case Op_Store4F: return new (C, 4) Store4FNode(ctl, mem, adr, atyp, val);
+  case Op_Store2F: return new (C, 4) Store2FNode(ctl, mem, adr, atyp, val);
+
+  case Op_Store2D: return new (C, 4) Store2DNode(ctl, mem, adr, atyp, val);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
+
+// Extract a scalar element of vector.
+Node* ExtractNode::make(Compile* C, Node* v, uint position, const Type* opd_t) {
+  BasicType bt = opd_t->array_element_basic_type();
+  assert(position < VectorNode::max_vlen(bt), "pos in range");
+  ConINode* pos = ConINode::make(C, (int)position);
+  switch (bt) {
+  case T_BOOLEAN:
+  case T_BYTE:
+    return new (C, 3) ExtractBNode(v, pos);
+  case T_CHAR:
+    return new (C, 3) ExtractCNode(v, pos);
+  case T_SHORT:
+    return new (C, 3) ExtractSNode(v, pos);
+  case T_INT:
+    return new (C, 3) ExtractINode(v, pos);
+  case T_LONG:
+    return new (C, 3) ExtractLNode(v, pos);
+  case T_FLOAT:
+    return new (C, 3) ExtractFNode(v, pos);
+  case T_DOUBLE:
+    return new (C, 3) ExtractDNode(v, pos);
+  }
+  ShouldNotReachHere();
+  return NULL;
+}
diff --git a/src/share/vm/opto/vectornode.hpp b/src/share/vm/opto/vectornode.hpp
new file mode 100644
index 000000000..c06386777
--- /dev/null
+++ b/src/share/vm/opto/vectornode.hpp
@@ -0,0 +1,1134 @@
+/*
+ * Copyright 2007 Sun Microsystems, Inc.  All Rights Reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
+ * CA 95054 USA or visit www.sun.com if you need additional information or
+ * have any questions.
+ */
+
+//------------------------------VectorNode--------------------------------------
+// Vector Operation
+class VectorNode : public Node {
+ protected:
+  uint _length; // vector length
+  virtual BasicType elt_basic_type() const = 0; // Vector element basic type
+
+  static const Type* vect_type(BasicType elt_bt, uint len);
+  static const Type* vect_type(const Type* elt_type, uint len) {
+    return vect_type(elt_type->array_element_basic_type(), len);
+  }
+
+ public:
+  friend class VectorLoadNode;  // For vect_type
+  friend class VectorStoreNode; // ditto.
+
+  VectorNode(Node* n1, uint vlen) : Node(NULL, n1), _length(vlen) {
+    init_flags(Flag_is_Vector);
+  }
+  VectorNode(Node* n1, Node* n2, uint vlen) : Node(NULL, n1, n2), _length(vlen) {
+    init_flags(Flag_is_Vector);
+  }
+  virtual int Opcode() const;
+
+  uint length() const { return _length; } // Vector length
+
+  static uint max_vlen(BasicType bt) { // max vector length
+    return (uint)(Matcher::vector_width_in_bytes() / type2aelembytes[bt]);
+  }
+
+  // Element and vector type
+  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
+  const Type* vect_type() const { return vect_type(elt_basic_type(), length()); }
+
+  virtual const Type *bottom_type() const { return vect_type(); }
+  virtual uint        ideal_reg()   const { return Matcher::vector_ideal_reg(); }
+
+  // Vector opcode from scalar opcode
+  static int opcode(int sopc, uint vlen, const Type* opd_t);
+
+  static VectorNode* scalar2vector(Compile* C, Node* s, uint vlen, const Type* opd_t);
+
+  static VectorNode* make(Compile* C, int sopc, Node* n1, Node* n2, uint vlen, const Type* elt_t);
+
+};
+
+//===========================Vector=ALU=Operations====================================
+
+//------------------------------AddVBNode---------------------------------------
+// Vector add byte
+class AddVBNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  AddVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVCNode---------------------------------------
+// Vector add char
+class AddVCNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  AddVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVSNode---------------------------------------
+// Vector add short
+class AddVSNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  AddVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVINode---------------------------------------
+// Vector add int
+class AddVINode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  AddVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVLNode---------------------------------------
+// Vector add long
+class AddVLNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  AddVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVFNode---------------------------------------
+// Vector add float
+class AddVFNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  AddVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AddVDNode---------------------------------------
+// Vector add double
+class AddVDNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  AddVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVBNode---------------------------------------
+// Vector subtract byte
+class SubVBNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  SubVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVCNode---------------------------------------
+// Vector subtract char
+class SubVCNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  SubVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVSNode---------------------------------------
+// Vector subtract short
+class SubVSNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  SubVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVINode---------------------------------------
+// Vector subtract int
+class SubVINode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  SubVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVLNode---------------------------------------
+// Vector subtract long
+class SubVLNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  SubVLNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVFNode---------------------------------------
+// Vector subtract float
+class SubVFNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  SubVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------SubVDNode---------------------------------------
+// Vector subtract double
+class SubVDNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  SubVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------MulVFNode---------------------------------------
+// Vector multiply float
+class MulVFNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  MulVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------MulVDNode---------------------------------------
+// Vector multiply double
+class MulVDNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  MulVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------DivVFNode---------------------------------------
+// Vector divide float
+class DivVFNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  DivVFNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------DivVDNode---------------------------------------
+// Vector Divide double
+class DivVDNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  DivVDNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LShiftVBNode---------------------------------------
+// Vector lshift byte
+class LShiftVBNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  LShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LShiftVCNode---------------------------------------
+// Vector lshift chars
+class LShiftVCNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  LShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LShiftVSNode---------------------------------------
+// Vector lshift shorts
+class LShiftVSNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  LShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------LShiftVINode---------------------------------------
+// Vector lshift ints
+class LShiftVINode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  LShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVBNode---------------------------------------
+// Vector urshift bytes
+class URShiftVBNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  URShiftVBNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVCNode---------------------------------------
+// Vector urshift char
+class URShiftVCNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  URShiftVCNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVSNode---------------------------------------
+// Vector urshift shorts
+class URShiftVSNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  URShiftVSNode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------URShiftVINode---------------------------------------
+// Vector urshift ints
+class URShiftVINode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  URShiftVINode(Node* in1, Node* in2, uint vlen) : VectorNode(in1,in2,vlen) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------AndVNode---------------------------------------
+// Vector and
+class AndVNode : public VectorNode {
+ protected:
+  BasicType _bt;
+  virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+  AndVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------OrVNode---------------------------------------
+// Vector or
+class OrVNode : public VectorNode {
+ protected:
+  BasicType _bt;
+  virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+  OrVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------XorVNode---------------------------------------
+// Vector xor
+class XorVNode : public VectorNode {
+ protected:
+  BasicType _bt;
+  virtual BasicType elt_basic_type() const { return _bt; }
+ public:
+  XorVNode(Node* in1, Node* in2, uint vlen, BasicType bt) : VectorNode(in1,in2,vlen), _bt(bt) {}
+  virtual int Opcode() const;
+};
+
+//================================= M E M O R Y ==================================
+
+
+//------------------------------VectorLoadNode--------------------------------------
+// Vector Load from memory
+class VectorLoadNode : public LoadNode {
+  virtual uint size_of() const { return sizeof(*this); }
+
+ protected:
+  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
+  // For use in constructor
+  static const Type* vect_type(const Type* elt_type, uint len) {
+    return VectorNode::vect_type(elt_type, len);
+  }
+
+ public:
+  VectorLoadNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *rt)
+    : LoadNode(c,mem,adr,at,rt) {
+      init_flags(Flag_is_Vector);
+  }
+  virtual int Opcode() const;
+
+  virtual uint  length() const = 0; // Vector length
+
+  // Element and vector type
+  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
+  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
+  virtual BasicType memory_type() const { return T_VOID; }
+  virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; }
+
+  // Vector opcode from scalar opcode
+  static int opcode(int sopc, uint vlen);
+
+  static VectorLoadNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+                              Node* adr, const TypePtr* atyp, uint vlen);
+};
+
+//------------------------------Load16BNode--------------------------------------
+// Vector load of 16 bytes (8bits signed) from memory
+class Load16BNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Load16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,16)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store16B; }
+  virtual uint length() const { return 16; }
+};
+
+//------------------------------Load8BNode--------------------------------------
+// Vector load of 8 bytes (8bits signed) from memory
+class Load8BNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Load8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store8B; }
+  virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4BNode--------------------------------------
+// Vector load of 4 bytes (8bits signed) from memory
+class Load4BNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Load4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::BYTE)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store4B; }
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Load8CNode--------------------------------------
+// Vector load of 8 chars (16bits unsigned) from memory
+class Load8CNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Load8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store8C; }
+  virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4CNode--------------------------------------
+// Vector load of 4 chars (16bits unsigned) from memory
+class Load4CNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Load4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store4C; }
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2CNode--------------------------------------
+// Vector load of 2 chars (16bits unsigned) from memory
+class Load2CNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Load2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::CHAR)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2C; }
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Load8SNode--------------------------------------
+// Vector load of 8 shorts (16bits signed) from memory
+class Load8SNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Load8SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,8)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store8C; }
+  virtual uint length() const { return 8; }
+};
+
+//------------------------------Load4SNode--------------------------------------
+// Vector load of 4 shorts (16bits signed) from memory
+class Load4SNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Load4SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store4C; }
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2SNode--------------------------------------
+// Vector load of 2 shorts (16bits signed) from memory
+class Load2SNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Load2SNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::SHORT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2C; }
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Load4INode--------------------------------------
+// Vector load of 4 integers (32bits signed) from memory
+class Load4INode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Load4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,4)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store4I; }
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2INode--------------------------------------
+// Vector load of 2 integers (32bits signed) from memory
+class Load2INode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Load2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeInt *ti = TypeInt::INT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(ti,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2I; }
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Load2LNode--------------------------------------
+// Vector load of 2 longs (64bits signed) from memory
+class Load2LNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  Load2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const TypeLong *tl = TypeLong::LONG)
+    : VectorLoadNode(c,mem,adr,at,vect_type(tl,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2L; }
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Load4FNode--------------------------------------
+// Vector load of 4 floats (32bits) from memory
+class Load4FNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Load4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(t,4)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store4F; }
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Load2FNode--------------------------------------
+// Vector load of 2 floats (32bits) from memory
+class Load2FNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Load2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::FLOAT)
+    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2F; }
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Load2DNode--------------------------------------
+// Vector load of 2 doubles (64bits) from memory
+class Load2DNode : public VectorLoadNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  Load2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, const Type *t = Type::DOUBLE)
+    : VectorLoadNode(c,mem,adr,at,vect_type(t,2)) {}
+  virtual int Opcode() const;
+  virtual int store_Opcode() const { return Op_Store2D; }
+  virtual uint length() const { return 2; }
+};
+
+
+//------------------------------VectorStoreNode--------------------------------------
+// Vector Store to memory
+class VectorStoreNode : public StoreNode {
+  virtual uint size_of() const { return sizeof(*this); }
+
+ protected:
+  virtual BasicType elt_basic_type()  const = 0; // Vector element basic type
+
+ public:
+  VectorStoreNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : StoreNode(c,mem,adr,at,val) {
+      init_flags(Flag_is_Vector);
+  }
+  virtual int Opcode() const;
+
+  virtual uint  length() const = 0; // Vector length
+
+  // Element and vector type
+  const Type* elt_type()  const { return Type::get_const_basic_type(elt_basic_type()); }
+  const Type* vect_type() const { return VectorNode::vect_type(elt_basic_type(), length()); }
+
+  virtual uint ideal_reg() const  { return Matcher::vector_ideal_reg(); }
+  virtual BasicType memory_type() const { return T_VOID; }
+  virtual int memory_size() const { return length()*type2aelembytes[elt_basic_type()]; }
+
+  // Vector opcode from scalar opcode
+  static int opcode(int sopc, uint vlen);
+
+  static VectorStoreNode* make(Compile* C, int opc, Node* ctl, Node* mem,
+                               Node* adr, const TypePtr* atyp, VectorNode* val,
+                               uint vlen);
+};
+
+//------------------------------Store16BNode--------------------------------------
+// Vector store of 16 bytes (8bits signed) to memory
+class Store16BNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Store16BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 16; }
+};
+
+//------------------------------Store8BNode--------------------------------------
+// Vector store of 8 bytes (8bits signed) to memory
+class Store8BNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Store8BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 8; }
+};
+
+//------------------------------Store4BNode--------------------------------------
+// Vector store of 4 bytes (8bits signed) to memory
+class Store4BNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Store4BNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Store8CNode--------------------------------------
+// Vector store of 8 chars (16bits signed/unsigned) to memory
+class Store8CNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Store8CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 8; }
+};
+
+//------------------------------Store4CNode--------------------------------------
+// Vector store of 4 chars (16bits signed/unsigned) to memory
+class Store4CNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Store4CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2CNode--------------------------------------
+// Vector store of 2 chars (16bits signed/unsigned) to memory
+class Store2CNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Store2CNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Store4INode--------------------------------------
+// Vector store of 4 integers (32bits signed) to memory
+class Store4INode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Store4INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2INode--------------------------------------
+// Vector store of 2 integers (32bits signed) to memory
+class Store2INode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Store2INode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Store2LNode--------------------------------------
+// Vector store of 2 longs (64bits signed) to memory
+class Store2LNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  Store2LNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Store4FNode--------------------------------------
+// Vector store of 4 floats (32bits) to memory
+class Store4FNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Store4FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 4; }
+};
+
+//------------------------------Store2FNode--------------------------------------
+// Vector store of 2 floats (32bits) to memory
+class Store2FNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Store2FNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 2; }
+};
+
+//------------------------------Store2DNode--------------------------------------
+// Vector store of 2 doubles (64bits) to memory
+class Store2DNode : public VectorStoreNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  Store2DNode(Node* c, Node* mem, Node* adr, const TypePtr* at, Node* val)
+    : VectorStoreNode(c,mem,adr,at,val) {}
+  virtual int Opcode() const;
+  virtual uint length() const { return 2; }
+};
+
+//=========================Promote_Scalar_to_Vector====================================
+
+//------------------------------Replicate16BNode---------------------------------------
+// Replicate byte scalar to be vector of 16 bytes
+class Replicate16BNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Replicate16BNode(Node* in1) : VectorNode(in1, 16) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate8BNode---------------------------------------
+// Replicate byte scalar to be vector of 8 bytes
+class Replicate8BNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Replicate8BNode(Node* in1) : VectorNode(in1, 8) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate4BNode---------------------------------------
+// Replicate byte scalar to be vector of 4 bytes
+class Replicate4BNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Replicate4BNode(Node* in1) : VectorNode(in1, 4) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate8CNode---------------------------------------
+// Replicate char scalar to be vector of 8 chars
+class Replicate8CNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Replicate8CNode(Node* in1) : VectorNode(in1, 8) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate4CNode---------------------------------------
+// Replicate char scalar to be vector of 4 chars
+class Replicate4CNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Replicate4CNode(Node* in1) : VectorNode(in1, 4) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2CNode---------------------------------------
+// Replicate char scalar to be vector of 2 chars
+class Replicate2CNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Replicate2CNode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate8SNode---------------------------------------
+// Replicate short scalar to be vector of 8 shorts
+class Replicate8SNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Replicate8SNode(Node* in1) : VectorNode(in1, 8) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate4SNode---------------------------------------
+// Replicate short scalar to be vector of 4 shorts
+class Replicate4SNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Replicate4SNode(Node* in1) : VectorNode(in1, 4) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2SNode---------------------------------------
+// Replicate short scalar to be vector of 2 shorts
+class Replicate2SNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  Replicate2SNode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate4INode---------------------------------------
+// Replicate int scalar to be vector of 4 ints
+class Replicate4INode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Replicate4INode(Node* in1) : VectorNode(in1, 4) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2INode---------------------------------------
+// Replicate int scalar to be vector of 2 ints
+class Replicate2INode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  Replicate2INode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2LNode---------------------------------------
+// Replicate long scalar to be vector of 2 longs
+class Replicate2LNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  Replicate2LNode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate4FNode---------------------------------------
+// Replicate float scalar to be vector of 4 floats
+class Replicate4FNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Replicate4FNode(Node* in1) : VectorNode(in1, 4) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2FNode---------------------------------------
+// Replicate float scalar to be vector of 2 floats
+class Replicate2FNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  Replicate2FNode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------Replicate2DNode---------------------------------------
+// Replicate double scalar to be vector of 2 doubles
+class Replicate2DNode : public VectorNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  Replicate2DNode(Node* in1) : VectorNode(in1, 2) {}
+  virtual int Opcode() const;
+};
+
+//========================Pack_Scalars_into_a_Vector==============================
+
+//------------------------------PackNode---------------------------------------
+// Pack parent class (not for code generation).
+class PackNode : public VectorNode {
+ public:
+  PackNode(Node* in1)  : VectorNode(in1, 1) {}
+  PackNode(Node* in1, Node* n2)  : VectorNode(in1, n2, 2) {}
+  virtual int Opcode() const;
+
+  void add_opd(Node* n) {
+    add_req(n);
+    _length++;
+    assert(_length == req() - 1, "vector length matches edge count");
+  }
+
+  // Create a binary tree form for Packs. [lo, hi) (half-open) range
+  Node* binaryTreePack(Compile* C, int lo, int hi);
+
+  static PackNode* make(Compile* C, Node* s, const Type* elt_t);
+};
+
+//------------------------------PackBNode---------------------------------------
+// Pack byte scalars into vector
+class PackBNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  PackBNode(Node* in1)  : PackNode(in1) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackCNode---------------------------------------
+// Pack char scalars into vector
+class PackCNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  PackCNode(Node* in1)  : PackNode(in1) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackSNode---------------------------------------
+// Pack short scalars into a vector
+class PackSNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_SHORT; }
+ public:
+  PackSNode(Node* in1)  : PackNode(in1) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackINode---------------------------------------
+// Pack integer scalars into a vector
+class PackINode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_INT; }
+ public:
+  PackINode(Node* in1)  : PackNode(in1) {}
+  PackINode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackLNode---------------------------------------
+// Pack long scalars into a vector
+class PackLNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_LONG; }
+ public:
+  PackLNode(Node* in1)  : PackNode(in1) {}
+  PackLNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackFNode---------------------------------------
+// Pack float scalars into vector
+class PackFNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_FLOAT; }
+ public:
+  PackFNode(Node* in1)  : PackNode(in1) {}
+  PackFNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+};
+
+//------------------------------PackDNode---------------------------------------
+// Pack double scalars into a vector
+class PackDNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_DOUBLE; }
+ public:
+  PackDNode(Node* in1)  : PackNode(in1) {}
+  PackDNode(Node* in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+};
+
+// The Pack2xN nodes assist code generation.  They are created from
+// Pack4C, etc. nodes in final_graph_reshape in the form of a
+// balanced, binary tree.
+
+//------------------------------Pack2x1BNode-----------------------------------------
+// Pack 2 1-byte integers into vector of 2 bytes
+class Pack2x1BNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_BYTE; }
+ public:
+  Pack2x1BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------Pack2x2BNode---------------------------------------
+// Pack 2 2-byte integers into vector of 4 bytes
+class Pack2x2BNode : public PackNode {
+ protected:
+  virtual BasicType elt_basic_type() const { return T_CHAR; }
+ public:
+  Pack2x2BNode(Node *in1, Node* in2) : PackNode(in1, in2) {}
+  virtual int Opcode() const;
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//========================Extract_Scalar_from_Vector===============================
+
+//------------------------------ExtractNode---------------------------------------
+// Extract a scalar from a vector at position "pos"
+class ExtractNode : public Node {
+ public:
+  ExtractNode(Node* src, ConINode* pos) : Node(NULL, src, (Node*)pos) {
+    assert(in(2)->get_int() >= 0, "positive constants");
+  }
+  virtual int Opcode() const;
+  uint  pos() const { return in(2)->get_int(); }
+
+  static Node* make(Compile* C, Node* v, uint position, const Type* opd_t);
+};
+
+//------------------------------ExtractBNode---------------------------------------
+// Extract a byte from a vector at position "pos"
+class ExtractBNode : public ExtractNode {
+ public:
+  ExtractBNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractCNode---------------------------------------
+// Extract a char from a vector at position "pos"
+class ExtractCNode : public ExtractNode {
+ public:
+  ExtractCNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractSNode---------------------------------------
+// Extract a short from a vector at position "pos"
+class ExtractSNode : public ExtractNode {
+ public:
+  ExtractSNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractINode---------------------------------------
+// Extract an int from a vector at position "pos"
+class ExtractINode : public ExtractNode {
+ public:
+  ExtractINode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeInt::INT; }
+  virtual uint ideal_reg() const { return Op_RegI; }
+};
+
+//------------------------------ExtractLNode---------------------------------------
+// Extract a long from a vector at position "pos"
+class ExtractLNode : public ExtractNode {
+ public:
+  ExtractLNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return TypeLong::LONG; }
+  virtual uint ideal_reg() const { return Op_RegL; }
+};
+
+//------------------------------ExtractFNode---------------------------------------
+// Extract a float from a vector at position "pos"
+class ExtractFNode : public ExtractNode {
+ public:
+  ExtractFNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::FLOAT; }
+  virtual uint ideal_reg() const { return Op_RegF; }
+};
+
+//------------------------------ExtractDNode---------------------------------------
+// Extract a double from a vector at position "pos"
+class ExtractDNode : public ExtractNode {
+ public:
+  ExtractDNode(Node* src, ConINode* pos) : ExtractNode(src, pos) {}
+  virtual int Opcode() const;
+  virtual const Type *bottom_type() const { return Type::DOUBLE; }
+  virtual uint ideal_reg() const { return Op_RegD; }
+};
author	duke <none@none>	2007-12-01 00:00:00 +0000
committer	duke <none@none>	2007-12-01 00:00:00 +0000
commit	fa6b5a8027b86d2f8a200e72b4ef6a0d3f9189d3 (patch)
tree	8376f6e5c41e70162b5867d9e1fea3f17f540473 /src/share/vm/opto